From 0b8b212ab34bd4c9e8f76ef50e4a4a563ce8b636 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Tue, 25 Nov 2025 11:10:07 -0800 Subject: [PATCH 1/4] Introduce HyperlightVm as a transparent wrapper Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/hypervisor/hyperlight_vm.rs | 99 +++++++++++++++++++ src/hyperlight_host/src/hypervisor/mod.rs | 2 + .../src/sandbox/initialized_multi_use.rs | 7 +- .../src/sandbox/uninitialized_evolve.rs | 15 +-- 4 files changed, 114 insertions(+), 9 deletions(-) create mode 100644 src/hyperlight_host/src/hypervisor/hyperlight_vm.rs diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs new file mode 100644 index 000000000..e5c1634ff --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -0,0 +1,99 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use crate::Result; + +use crate::hypervisor::{Hypervisor, InterruptHandle}; +use crate::mem::memory_region::MemoryRegion; +use crate::mem::mgr::SandboxMemoryManager; +use crate::mem::ptr::RawPtr; +use crate::mem::shared_mem::HostSharedMemory; +use crate::sandbox::host_funcs::FunctionRegistry; + +use log::LevelFilter; +use std::sync::{Arc, Mutex}; + +#[derive(Debug)] +pub(crate) struct HyperlightVm { + pub(crate) vm: Box, +} + +impl HyperlightVm { + pub(crate) fn new(inner: Box) -> Self { + Self { vm: inner } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn initialise( + &mut self, + peb_addr: RawPtr, + seed: u64, + page_size: u32, + mem_mgr: &mut SandboxMemoryManager, + host_funcs: &Arc>, + guest_max_log_level: Option, + #[cfg(gdb)] dbg_mem_access_fn: Arc>>, + ) -> Result<()> { + self.vm.initialise( + peb_addr, + seed, + page_size, + mem_mgr, + host_funcs, + guest_max_log_level, + #[cfg(gdb)] + dbg_mem_access_fn, + ) + } + + pub(crate) unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()> { + unsafe { self.vm.map_region(rgn) } + } + + pub(crate) unsafe fn unmap_region(&mut self, rgn: &MemoryRegion) -> Result<()> { + unsafe { self.vm.unmap_region(rgn) } + } + + pub(crate) fn get_mapped_regions( + &self, + ) -> Box + '_> { + self.vm.get_mapped_regions() + } + + pub(crate) fn dispatch_call_from_host( + &mut self, + dispatch_func_addr: RawPtr, + mem_mgr: &mut SandboxMemoryManager, + host_funcs: &Arc>, + #[cfg(gdb)] dbg_mem_access_fn: Arc>>, + ) -> Result<()> { + self.vm.dispatch_call_from_host( + dispatch_func_addr, + mem_mgr, + host_funcs, + #[cfg(gdb)] + dbg_mem_access_fn, + ) + } + + pub(crate) fn interrupt_handle(&self) -> Arc { + self.vm.interrupt_handle() + } + + pub(crate) fn clear_cancel(&self) { + self.vm.clear_cancel() + } +} diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index be094a009..55ed2f465 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -59,6 +59,8 @@ pub(crate) mod wrappers; #[cfg(crashdump)] pub(crate) mod crashdump; +pub(crate) mod hyperlight_vm; + use std::fmt::Debug; use std::str::FromStr; #[cfg(any(kvm, mshv3))] diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index f1d71fad9..afb065047 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -37,6 +37,7 @@ use super::host_funcs::FunctionRegistry; use super::snapshot::Snapshot; use crate::HyperlightError::{self, SnapshotSandboxMismatch}; use crate::func::{ParameterTuple, SupportedReturnType}; +use crate::hypervisor::hyperlight_vm::HyperlightVm; use crate::hypervisor::{Hypervisor, InterruptHandle}; #[cfg(unix)] use crate::mem::memory_region::MemoryRegionType; @@ -97,7 +98,7 @@ pub struct MultiUseSandbox { poisoned: bool, pub(super) host_funcs: Arc>, pub(crate) mem_mgr: SandboxMemoryManager, - vm: Box, + vm: HyperlightVm, dispatch_ptr: RawPtr, #[cfg(gdb)] dbg_mem_access_fn: Arc>>, @@ -116,7 +117,7 @@ impl MultiUseSandbox { pub(super) fn from_uninit( host_funcs: Arc>, mgr: SandboxMemoryManager, - vm: Box, + vm: HyperlightVm, dispatch_ptr: RawPtr, #[cfg(gdb)] dbg_mem_access_fn: Arc>>, ) -> MultiUseSandbox { @@ -711,7 +712,7 @@ impl MultiUseSandbox { #[cfg(crashdump)] #[instrument(err(Debug), skip_all, parent = Span::current())] pub fn generate_crashdump(&self) -> Result<()> { - crate::hypervisor::crashdump::generate_crashdump(self.vm.as_ref()) + crate::hypervisor::crashdump::generate_crashdump(self.vm.vm.as_ref() as &dyn Hypervisor) } /// Returns whether the sandbox is currently poisoned. diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index ba4680617..a5be963a3 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -26,6 +26,7 @@ use super::hypervisor::{HypervisorType, get_available_hypervisor}; use super::uninitialized::SandboxRuntimeConfig; use crate::HyperlightError::NoHypervisorFound; use crate::hypervisor::Hypervisor; +use crate::hypervisor::hyperlight_vm::HyperlightVm; use crate::mem::exe::LoadInfo; use crate::mem::layout::SandboxMemoryLayout; use crate::mem::mgr::SandboxMemoryManager; @@ -106,7 +107,7 @@ pub(crate) fn set_up_hypervisor_partition( #[cfg_attr(target_os = "windows", allow(unused_variables))] config: &SandboxConfiguration, #[cfg(any(crashdump, gdb))] rt_cfg: &SandboxRuntimeConfig, _load_info: LoadInfo, -) -> Result> { +) -> Result { #[cfg(feature = "init-paging")] let rsp_ptr = { let mut regions = mgr.layout.get_memory_regions(&mgr.shared_mem)?; @@ -167,7 +168,7 @@ pub(crate) fn set_up_hypervisor_partition( #[cfg(feature = "mem_profile")] let trace_info = MemTraceInfo::new(_load_info)?; - match *get_available_hypervisor() { + let vm: Box = match *get_available_hypervisor() { #[cfg(mshv3)] Some(HypervisorType::Mshv) => { let hv = crate::hypervisor::hyperv_linux::HypervLinuxDriver::new( @@ -183,7 +184,7 @@ pub(crate) fn set_up_hypervisor_partition( #[cfg(feature = "mem_profile")] trace_info, )?; - Ok(Box::new(hv)) + Box::new(hv) } #[cfg(kvm)] @@ -201,7 +202,7 @@ pub(crate) fn set_up_hypervisor_partition( #[cfg(feature = "mem_profile")] trace_info, )?; - Ok(Box::new(hv)) + Box::new(hv) } #[cfg(target_os = "windows")] @@ -225,13 +226,15 @@ pub(crate) fn set_up_hypervisor_partition( #[cfg(feature = "mem_profile")] trace_info, )?; - Ok(Box::new(hv)) + Box::new(hv) } _ => { log_then_return!(NoHypervisorFound()); } - } + }; + + Ok(HyperlightVm::new(vm)) } #[cfg(test)] From 3954d0aa27c1116c18f5bb041d5b6e86f4c1aa5a Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Tue, 25 Nov 2025 11:15:08 -0800 Subject: [PATCH 2/4] Implement run, handle_io, memory mapping Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/src/hypervisor/gdb/mod.rs | 2 +- .../src/hypervisor/hyperlight_vm.rs | 359 +++++++++++++++++- .../src/hypervisor/hyperv_linux.rs | 230 +++-------- .../src/hypervisor/hyperv_windows.rs | 25 +- src/hyperlight_host/src/hypervisor/kvm.rs | 251 ++---------- src/hyperlight_host/src/hypervisor/mod.rs | 287 +++----------- src/hyperlight_host/src/mem/memory_region.rs | 8 +- .../src/sandbox/initialized_multi_use.rs | 14 +- .../src/sandbox/uninitialized_evolve.rs | 4 +- 9 files changed, 491 insertions(+), 689 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/gdb/mod.rs b/src/hyperlight_host/src/hypervisor/gdb/mod.rs index fb4829ef0..66467c931 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/mod.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/mod.rs @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -mod arch; +pub(crate) mod arch; mod event_loop; #[cfg(target_os = "windows")] mod hyperv_debug; diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs index e5c1634ff..dde4c2024 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -14,26 +14,96 @@ See the License for the specific language governing permissions and limitations under the License. */ -use crate::Result; +use crate::metrics::{METRIC_ERRONEOUS_VCPU_KICKS, METRIC_GUEST_CANCELLATION}; +use crate::{HyperlightError, Result, log_then_return, new_error}; -use crate::hypervisor::{Hypervisor, InterruptHandle}; -use crate::mem::memory_region::MemoryRegion; +#[cfg(any(kvm, mshv3))] +use crate::hypervisor::LinuxInterruptHandle; +use crate::hypervisor::{ + HyperlightExit, Hypervisor, InterruptHandle, InterruptHandleImpl, MemoryAccess, + get_memory_access_violation, +}; +use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::mgr::SandboxMemoryManager; use crate::mem::ptr::RawPtr; use crate::mem::shared_mem::HostSharedMemory; +use crate::sandbox::SandboxConfiguration; use crate::sandbox::host_funcs::FunctionRegistry; +use crate::sandbox::outb::handle_outb; use log::LevelFilter; +#[cfg(any(kvm, mshv3))] +#[cfg(not(all( + target_arch = "x86_64", + target_vendor = "unknown", + target_os = "linux", + target_env = "musl" +)))] +use std::sync::atomic::AtomicU64; +#[cfg(any(kvm, mshv3))] +use std::sync::atomic::{AtomicBool, AtomicU8}; use std::sync::{Arc, Mutex}; +use tracing::{Span, instrument}; #[derive(Debug)] pub(crate) struct HyperlightVm { pub(crate) vm: Box, + interrupt_handle: Arc, + + sandbox_regions: Vec, // Initially mapped regions when sandbox is created + mmap_regions: Vec<(u32, MemoryRegion)>, // Later mapped regions (slot number, region) + next_slot: u32, // Monotonically increasing slot number + freed_slots: Vec, // Reusable slots from unmapped regions } impl HyperlightVm { - pub(crate) fn new(inner: Box) -> Self { - Self { vm: inner } + pub(crate) fn new( + mut vm: Box, + mem_regions: Vec, + config: &SandboxConfiguration, + ) -> Result { + #[cfg(any(kvm, mshv3))] + let interrupt_handle: Arc = Arc::new(LinuxInterruptHandle { + state: AtomicU8::new(0), + #[cfg(all( + target_arch = "x86_64", + target_vendor = "unknown", + target_os = "linux", + target_env = "musl" + ))] + tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }), + #[cfg(not(all( + target_arch = "x86_64", + target_vendor = "unknown", + target_os = "linux", + target_env = "musl" + )))] + tid: AtomicU64::new(unsafe { libc::pthread_self() }), + retry_delay: config.get_interrupt_retry_delay(), + sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(), + dropped: AtomicBool::new(false), + }); + + #[cfg(target_os = "windows")] + let interrupt_handle: Arc = Arc::new(WindowsInterruptHandle { + state: AtomicU8::new(0), + partition_handle: vm.partition_handle(), + dropped: AtomicBool::new(false), + }); + + for (i, region) in mem_regions.iter().enumerate() { + // Safety: slots are unique and region points to valid memory since we created the regions + unsafe { vm.map_memory((i as u32, region))? }; + } + + Ok(Self { + vm, + interrupt_handle, + next_slot: mem_regions.len() as u32, + sandbox_regions: mem_regions, + mmap_regions: Vec::new(), + freed_slots: Vec::new(), + }) } #[allow(clippy::too_many_arguments)] @@ -56,21 +126,41 @@ impl HyperlightVm { guest_max_log_level, #[cfg(gdb)] dbg_mem_access_fn, - ) + )?; + self.run(mem_mgr, host_funcs) } - pub(crate) unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()> { - unsafe { self.vm.map_region(rgn) } + // Safety: The caller must ensure that the memory region is valid and points to valid memory, + pub(crate) unsafe fn map_region(&mut self, region: &MemoryRegion) -> Result<()> { + // Try to reuse a freed slot first, otherwise use next_slot + let slot = if let Some(freed_slot) = self.freed_slots.pop() { + freed_slot + } else { + let slot = self.next_slot; + self.next_slot += 1; + slot + }; + + // Safety: slots are unique. It's up to caller to ensure that the region is valid + unsafe { self.vm.map_memory((slot, region))? }; + self.mmap_regions.push((slot, region.clone())); + Ok(()) } - pub(crate) unsafe fn unmap_region(&mut self, rgn: &MemoryRegion) -> Result<()> { - unsafe { self.vm.unmap_region(rgn) } + pub(crate) fn unmap_region(&mut self, region: &MemoryRegion) -> Result<()> { + if let Some(pos) = self.mmap_regions.iter().position(|(_, r)| r == region) { + let (slot, _) = self.mmap_regions.remove(pos); + self.freed_slots.push(slot); + self.vm.unmap_memory((slot, region))?; + } else { + return Err(new_error!("Region not found in mapped regions")); + } + + Ok(()) } - pub(crate) fn get_mapped_regions( - &self, - ) -> Box + '_> { - self.vm.get_mapped_regions() + pub(crate) fn get_mapped_regions(&self) -> impl Iterator { + self.mmap_regions.iter().map(|(_, region)| region) } pub(crate) fn dispatch_call_from_host( @@ -86,7 +176,8 @@ impl HyperlightVm { host_funcs, #[cfg(gdb)] dbg_mem_access_fn, - ) + )?; + self.run(mem_mgr, host_funcs) } pub(crate) fn interrupt_handle(&self) -> Arc { @@ -96,4 +187,242 @@ impl HyperlightVm { pub(crate) fn clear_cancel(&self) { self.vm.clear_cancel() } + + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + fn handle_io( + &mut self, + mem_mgr: &mut SandboxMemoryManager, + host_funcs: &Arc>, + port: u16, + data: Vec, + ) -> Result<()> { + if data.is_empty() { + log_then_return!("no data was given in IO interrupt"); + } + + #[allow(clippy::get_first)] + let val = u32::from_le_bytes([ + data.get(0).copied().unwrap_or(0), + data.get(1).copied().unwrap_or(0), + data.get(2).copied().unwrap_or(0), + data.get(3).copied().unwrap_or(0), + ]); + + #[cfg(feature = "mem_profile")] + { + let regs = self.vm.regs()?; + handle_outb(mem_mgr, host_funcs, port, val, ®s, &mut self.trace_info)?; + } + + #[cfg(not(feature = "mem_profile"))] + { + handle_outb(mem_mgr, host_funcs, port, val)?; + } + + Ok(()) + } + + fn run( + &mut self, + mem_mgr: &mut SandboxMemoryManager, + host_funcs: &Arc>, + #[cfg(gdb)] dbg_mem_access_fn: Arc>>, + ) -> Result<()> { + // Keeps the trace context and open spans + #[cfg(feature = "trace_guest")] + let mut tc = crate::sandbox::trace::TraceContext::new(); + + let result = loop { + // ===== KILL() TIMING POINT 2: Before set_tid() ===== + // If kill() is called and ran to completion BEFORE this line executes: + // - CANCEL_BIT will be set and we will return an early HyperlightExit::Cancelled() + // without sending any signals/WHV api calls + #[cfg(any(kvm, mshv3))] + self.interrupt_handle.set_tid(); + self.interrupt_handle.set_running(); + // NOTE: `set_running()`` must be called before checking `is_cancelled()` + // otherwise we risk missing a call to `kill()` because the vcpu would not be marked as running yet so signals won't be sent + + let exit_reason = if self.interrupt_handle.is_cancelled() + || self.interrupt_handle.is_debug_interrupted() + { + Ok(HyperlightExit::Cancelled()) + } else { + #[cfg(feature = "trace_guest")] + tc.setup_guest_trace(Span::current().context()); + + // ==== KILL() TIMING POINT 3: Before calling run() ==== + // If kill() is called and ran to completion BEFORE this line executes: + // - Will still do a VM entry, but signals will be sent until VM exits + let result = self.vm.run_vcpu(); + + // End current host trace by closing the current span that captures traces + // happening when a guest exits and re-enters. + #[cfg(feature = "trace_guest")] + tc.end_host_trace(); + + // Handle the guest trace data if any + #[cfg(feature = "trace_guest")] + { + let regs = self.vm.regs()?; + if let Err(e) = tc.handle_trace(®s, mem_mgr) { + // If no trace data is available, we just log a message and continue + // Is this the right thing to do? + log::debug!("Error handling guest trace: {:?}", e); + } + } + result + }; + + // ===== KILL() TIMING POINT 4: Before clear_running() ===== + // If kill() is called and ran to completion BEFORE this line executes: + // - CANCEL_BIT will be set. Cancellation is deferred to the next iteration. + // - Signals will be sent until `clear_running()` is called, which is ok + self.interrupt_handle.clear_running(); + + // ===== KILL() TIMING POINT 5: Before capturing cancel_requested ===== + // If kill() is called and ran to completion BEFORE this line executes: + // - CANCEL_BIT will be set. Cancellation is deferred to the next iteration. + // - Signals will not be sent + let cancel_requested = self.interrupt_handle.is_cancelled(); + let debug_interrupted = self.interrupt_handle.is_debug_interrupted(); + + // ===== KILL() TIMING POINT 6: Before checking exit_reason ===== + // If kill() is called and ran to completion BEFORE this line executes: + // - CANCEL_BIT will be set. Cancellation is deferred to the next iteration. + // - Signals will not be sent + match exit_reason { + #[cfg(gdb)] + Ok(HyperlightExit::Debug { dr6, exception }) => { + // Handle debug event (breakpoints) + let stop_reason = + arch::vcpu_stop_reason(self.vm.as_mut(), dr6, self.entrypoint, exception)?; + if let Err(e) = self.handle_debug(dbg_mem_access_fn.clone(), stop_reason) { + break Err(e); + } + } + + Ok(HyperlightExit::Halt()) => { + break Ok(()); + } + Ok(HyperlightExit::IoOut(port, data)) => { + self.handle_io(mem_mgr, host_funcs, port, data)? + } + Ok(HyperlightExit::MmioRead(addr)) => { + let all_regions = self + .sandbox_regions + .iter() + .chain(self.mmap_regions.iter().map(|(_, r)| r)); + match get_memory_access_violation( + addr as usize, + MemoryRegionFlags::WRITE, + all_regions, + ) { + Some(MemoryAccess::StackGuardPageViolation) => { + break Err(HyperlightError::StackOverflow()); + } + Some(MemoryAccess::AccessViolation(region_flags)) => { + break Err(HyperlightError::MemoryAccessViolation( + addr, + MemoryRegionFlags::READ, + region_flags, + )); + } + None => { + if !mem_mgr.check_stack_guard()? { + break Err(HyperlightError::StackOverflow()); + } + + break Err(new_error!("MMIO READ access address {:#x}", addr)); + } + } + } + Ok(HyperlightExit::MmioWrite(addr)) => { + let all_regions = self + .sandbox_regions + .iter() + .chain(self.mmap_regions.iter().map(|(_, r)| r)); + match get_memory_access_violation( + addr as usize, + MemoryRegionFlags::WRITE, + all_regions, + ) { + Some(MemoryAccess::StackGuardPageViolation) => { + break Err(HyperlightError::StackOverflow()); + } + Some(MemoryAccess::AccessViolation(region_flags)) => { + break Err(HyperlightError::MemoryAccessViolation( + addr, + MemoryRegionFlags::WRITE, + region_flags, + )); + } + None => { + if !mem_mgr.check_stack_guard()? { + break Err(HyperlightError::StackOverflow()); + } + + break Err(new_error!("MMIO WRITE access address {:#x}", addr)); + } + } + } + Ok(HyperlightExit::Cancelled()) => { + // If cancellation was not requested for this specific guest function call, + // the vcpu was interrupted by a stale cancellation. This can occur when: + // - Linux: A signal from a previous call arrives late + // - Windows: WHvCancelRunVirtualProcessor called right after vcpu exits but RUNNING_BIT is still true + if !cancel_requested && !debug_interrupted { + // Track that an erroneous vCPU kick occurred + metrics::counter!(METRIC_ERRONEOUS_VCPU_KICKS).increment(1); + // treat this the same as a HyperlightExit::Retry, the cancel was not meant for this call + continue; + } + + // If the vcpu was interrupted by a debugger, we need to handle it + #[cfg(gdb)] + { + self.interrupt_handle.clear_debug_interrupt(); + if let Err(e) = + self.handle_debug(dbg_mem_access_fn.clone(), VcpuStopReason::Interrupt) + { + break Err(e); + } + } + + metrics::counter!(METRIC_GUEST_CANCELLATION).increment(1); + break Err(HyperlightError::ExecutionCanceledByHost()); + } + Ok(HyperlightExit::Unknown(reason)) => { + break Err(new_error!("Unexpected VM Exit: {:?}", reason)); + } + Ok(HyperlightExit::Retry()) => continue, + Err(e) => { + break Err(e); + } + } + }; + + match result { + Ok(_) => Ok(()), + Err(HyperlightError::ExecutionCanceledByHost()) => { + // no need to crashdump this + Err(HyperlightError::ExecutionCanceledByHost()) + } + Err(e) => { + #[cfg(crashdump)] + if self.rt_cfg.guest_core_dump { + crashdump::generate_crashdump(self)?; + } + + // If GDB is enabled, we handle the debug memory access + // Disregard return value as we want to return the error + #[cfg(gdb)] + if self.gdb_conn.is_some() { + self.handle_debug(dbg_mem_access_fn.clone(), VcpuStopReason::Crash)?; + } + + log_then_return!(e); + } + } + } } diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index 0dd7ad240..08e559156 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -45,11 +45,11 @@ use super::gdb::{ DebugCommChannel, DebugMemoryAccess, DebugMsg, DebugResponse, GuestDebug, MshvDebug, VcpuStopReason, }; -use super::{HyperlightExit, Hypervisor, LinuxInterruptHandle, VirtualCPU}; +use super::{Hypervisor, LinuxInterruptHandle}; #[cfg(gdb)] use crate::HyperlightError; use crate::hypervisor::regs::CommonFpu; -use crate::hypervisor::{InterruptHandle, InterruptHandleImpl, get_memory_access_violation}; +use crate::hypervisor::{InterruptHandle, InterruptHandleImpl, HyperlightExit}; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::mgr::SandboxMemoryManager; use crate::mem::ptr::{GuestPtr, RawPtr}; @@ -273,9 +273,6 @@ pub(crate) struct HypervLinuxDriver { entrypoint: u64, interrupt_handle: Arc, - sandbox_regions: Vec, // Initially mapped regions when sandbox is created - mmap_regions: Vec, // Later mapped regions - #[cfg(gdb)] debug: Option, #[cfg(gdb)] @@ -299,7 +296,6 @@ impl HypervLinuxDriver { // TODO: refactor this function to take fewer arguments. Add trace_info to rt_cfg #[instrument(skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn new( - mem_regions: Vec, entrypoint_ptr: GuestPtr, rsp_ptr: GuestPtr, pml4_ptr: GuestPtr, @@ -365,11 +361,6 @@ impl HypervLinuxDriver { (None, None) }; - mem_regions.iter().try_for_each(|region| { - let mshv_region = region.to_owned().into(); - vm_fd.map_user_memory(mshv_region) - })?; - let interrupt_handle: Arc = Arc::new(LinuxInterruptHandle { state: AtomicU8::new(0), #[cfg(all( @@ -396,8 +387,6 @@ impl HypervLinuxDriver { page_size: 0, vm_fd, vcpu_fd, - sandbox_regions: mem_regions, - mmap_regions: Vec::new(), entrypoint: entrypoint_ptr.absolute()?, orig_rsp: rsp_ptr, interrupt_handle: interrupt_handle.clone(), @@ -431,13 +420,6 @@ impl Debug for HypervLinuxDriver { f.field("Entrypoint", &self.entrypoint) .field("Original RSP", &self.orig_rsp); - for region in &self.sandbox_regions { - f.field("Sandbox Memory Region", ®ion); - } - for region in &self.mmap_regions { - f.field("Mapped Memory Region", ®ion); - } - let regs = self.vcpu_fd.get_regs(); if let Ok(regs) = regs { @@ -488,51 +470,22 @@ impl Hypervisor for HypervLinuxDriver { }; self.vcpu_fd.set_regs(®s)?; - let interrupt_handle = self.interrupt_handle.clone(); - - VirtualCPU::run( - self.as_mut_hypervisor(), - interrupt_handle, - mem_mgr, - host_funcs, - #[cfg(gdb)] - dbg_mem_access_fn, - ) + Ok(()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()> { - if [ - rgn.guest_region.start, - rgn.guest_region.end, - rgn.host_region.start, - rgn.host_region.end, - ] - .iter() - .any(|x| x % self.page_size != 0) - { - log_then_return!("region is not page-aligned"); - } - let mshv_region: mshv_user_mem_region = rgn.to_owned().into(); + /// # Safety + /// The caller must ensure that the memory region is valid and points to valid memory, + /// and lives long enough for the VM to use it. + unsafe fn map_memory(&mut self, (_slot, region): (u32, &MemoryRegion)) -> Result<()> { + let mshv_region: mshv_user_mem_region = region.into(); self.vm_fd.map_user_memory(mshv_region)?; - self.mmap_regions.push(rgn.to_owned()); Ok(()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - unsafe fn unmap_region(&mut self, region: &MemoryRegion) -> Result<()> { - if let Some(pos) = self.mmap_regions.iter().position(|r| r == region) { - let removed_region = self.mmap_regions.remove(pos); - let mshv_region: mshv_user_mem_region = removed_region.into(); - self.vm_fd.unmap_user_memory(mshv_region)?; - Ok(()) - } else { - Err(new_error!("Tried to unmap region that is not mapped")) - } - } - - fn get_mapped_regions(&self) -> Box + '_> { - Box::new(self.mmap_regions.iter()) + fn unmap_memory(&mut self, (_slot, region): (u32, &MemoryRegion)) -> Result<()> { + let mshv_region: mshv_user_mem_region = region.into(); + self.vm_fd.unmap_user_memory(mshv_region)?; + Ok(()) } #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] @@ -555,61 +508,11 @@ impl Hypervisor for HypervLinuxDriver { // reset fpu state self.set_fpu(&CommonFpu::default())?; - let interrupt_handle = self.interrupt_handle.clone(); - // run - VirtualCPU::run( - self.as_mut_hypervisor(), - interrupt_handle, - mem_mgr, - host_funcs, - #[cfg(gdb)] - dbg_mem_access_fn, - ) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn handle_io( - &mut self, - port: u16, - data: Vec, - rip: u64, - instruction_length: u64, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - ) -> Result<()> { - let mut padded = [0u8; 4]; - let copy_len = data.len().min(4); - padded[..copy_len].copy_from_slice(&data[..copy_len]); - let val = u32::from_le_bytes(padded); - - #[cfg(feature = "mem_profile")] - { - let regs = self.regs()?; - let trace_info = self.trace_info_mut(); - handle_outb(mem_mgr, host_funcs, port, val, ®s, trace_info)?; - } - #[cfg(not(feature = "mem_profile"))] - { - handle_outb(mem_mgr, host_funcs, port, val)?; - } - - // update rip - self.vcpu_fd.set_reg(&[hv_register_assoc { - name: hv_register_name_HV_X64_REGISTER_RIP, - value: hv_register_value { - reg64: rip + instruction_length, - }, - ..Default::default() - }])?; Ok(()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn run( - &mut self, - #[cfg(feature = "trace_guest")] tc: &mut crate::sandbox::trace::TraceContext, - ) -> Result { + fn run_vcpu(&mut self) -> Result { const HALT_MESSAGE: hv_message_type = hv_message_type_HVMSG_X64_HALT; const IO_PORT_INTERCEPT_MESSAGE: hv_message_type = hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT; @@ -618,97 +521,67 @@ impl Hypervisor for HypervLinuxDriver { #[cfg(gdb)] const EXCEPTION_INTERCEPT: hv_message_type = hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT; - #[cfg(feature = "trace_guest")] - tc.setup_guest_trace(Span::current().context()); - let exit_reason = self.vcpu_fd.run(); let result = match exit_reason { Ok(m) => match m.header.message_type { - HALT_MESSAGE => { - crate::debug!("mshv - Halt Details : {:#?}", &self); - HyperlightExit::Halt() - } + HALT_MESSAGE => HyperlightExit::Halt(), IO_PORT_INTERCEPT_MESSAGE => { let io_message = m.to_ioport_info().map_err(mshv_ioctls::MshvError::from)?; let port_number = io_message.port_number; let rip = io_message.header.rip; let rax = io_message.rax; let instruction_length = io_message.header.instruction_length() as u64; - crate::debug!("mshv IO Details : \nPort : {}\n{:#?}", port_number, &self); - HyperlightExit::IoOut( - port_number, - rax.to_le_bytes().to_vec(), - rip, - instruction_length, - ) + + // mshv, unlike kvm, does not automatically increment RIP + self.vcpu_fd.set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_RIP, + value: hv_register_value { + reg64: rip + instruction_length, + }, + ..Default::default() + }])?; + HyperlightExit::IoOut(port_number, rax.to_le_bytes().to_vec()) } UNMAPPED_GPA_MESSAGE => { let mimo_message = m.to_memory_info().map_err(mshv_ioctls::MshvError::from)?; let addr = mimo_message.guest_physical_address; - crate::debug!( - "mshv MMIO unmapped GPA -Details: Address: {} \n {:#?}", - addr, - &self - ); - HyperlightExit::Mmio(addr) + match MemoryRegionFlags::try_from(mimo_message)? { + MemoryRegionFlags::READ => HyperlightExit::MmioRead(addr), + MemoryRegionFlags::WRITE => HyperlightExit::MmioWrite(addr), + _ => HyperlightExit::Unknown("Unknown MMIO access".to_string()), + } } INVALID_GPA_ACCESS_MESSAGE => { let mimo_message = m.to_memory_info().map_err(mshv_ioctls::MshvError::from)?; let gpa = mimo_message.guest_physical_address; let access_info = MemoryRegionFlags::try_from(mimo_message)?; - crate::debug!( - "mshv MMIO invalid GPA access -Details: Address: {} \n {:#?}", - gpa, - &self - ); - match get_memory_access_violation( - gpa as usize, - self.sandbox_regions.iter().chain(self.mmap_regions.iter()), - access_info, - ) { - Some(access_info_violation) => access_info_violation, - None => HyperlightExit::Mmio(gpa), + match access_info { + MemoryRegionFlags::READ => HyperlightExit::MmioRead(gpa), + MemoryRegionFlags::WRITE => HyperlightExit::MmioWrite(gpa), + _ => HyperlightExit::Unknown("Unknown MMIO access".to_string()), } } - // The only case an intercept exit is expected is when debugging is enabled - // and the intercepts are installed. - // Provide the extra information about the exception to accurately determine - // the stop reason #[cfg(gdb)] EXCEPTION_INTERCEPT => { - // Extract exception info from the message so we can figure out - // more information about the vCPU state - let ex_info = match m.to_exception_info().map_err(mshv_ioctls::MshvError::from) - { - Ok(info) => info, - Err(e) => { - log_then_return!("Error converting to exception info: {:?}", e); - } - }; - - match self.get_stop_reason(ex_info) { - Ok(reason) => HyperlightExit::Debug(reason), - Err(e) => { - log_then_return!("Error getting stop reason: {:?}", e); - } + use mshv_bindings::DebugRegisters; + + let ex_info = m + .to_exception_info() + .map_err(mshv_ioctls::MshvError::from)?; + let DebugRegisters { dr6, .. } = self.vcpu_fd.get_debug_regs()?; + HyperlightExit::Debug { + dr6, + exception: ex_info.exception_vector as u32, } } - other => { - crate::debug!("mshv Other Exit: Exit: {:#?} \n {:#?}", other, &self); - #[cfg(crashdump)] - let _ = crashdump::generate_crashdump(self); - log_then_return!("unknown Hyper-V run message type {:?}", other); - } + other => HyperlightExit::Unknown(format!("Unknown MSHV VCPU exit: {:?}", other)), }, Err(e) => match e.errno() { - // We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR + // InterruptHandle::kill() sends a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR libc::EINTR => HyperlightExit::Cancelled(), libc::EAGAIN => HyperlightExit::Retry(), - _ => { - crate::debug!("mshv Error - Details: Error: {} \n {:#?}", e, &self); - log_then_return!("Error running VCPU {:?}", e); - } + _ => HyperlightExit::Unknown(format!("Unknown MSHV VCPU error: {}", e)), }, }; Ok(result) @@ -747,11 +620,6 @@ impl Hypervisor for HypervLinuxDriver { Ok(()) } - #[instrument(skip_all, parent = Span::current(), level = "Trace")] - fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor { - self as &mut dyn Hypervisor - } - fn interrupt_handle(&self) -> Arc { self.interrupt_handle.clone() } @@ -970,13 +838,6 @@ impl Drop for HypervLinuxDriver { #[instrument(skip_all, parent = Span::current(), level = "Trace")] fn drop(&mut self) { self.interrupt_handle.set_dropped(); - for region in self.sandbox_regions.iter().chain(self.mmap_regions.iter()) { - let mshv_region: mshv_user_mem_region = region.to_owned().into(); - match self.vm_fd.unmap_user_memory(mshv_region) { - Ok(_) => (), - Err(e) => error!("Failed to unmap user memory in HyperVOnLinux ({:?})", e), - } - } } } @@ -1036,7 +897,6 @@ mod tests { let config: SandboxConfiguration = Default::default(); super::HypervLinuxDriver::new( - regions.build(), entrypoint_ptr, rsp_ptr, pml4_ptr, diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index d3bd95f88..f23736063 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -431,15 +431,7 @@ impl Hypervisor for HypervWindowsDriver { }; self.set_regs(®s)?; - let interrupt_handle = self.interrupt_handle.clone(); - VirtualCPU::run( - self.as_mut_hypervisor(), - interrupt_handle, - mem_mgr, - host_funcs, - #[cfg(gdb)] - dbg_mem_access_hdl, - ) + Ok(()) } #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] @@ -476,15 +468,7 @@ impl Hypervisor for HypervWindowsDriver { // reset fpu state self.processor.set_fpu(&CommonFpu::default())?; - let interrupt_handle = self.interrupt_handle.clone(); - VirtualCPU::run( - self.as_mut_hypervisor(), - interrupt_handle, - mem_mgr, - host_funcs, - #[cfg(gdb)] - dbg_mem_access_hdl, - ) + Ok(()) } #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] @@ -652,11 +636,6 @@ impl Hypervisor for HypervWindowsDriver { self.interrupt_handle.clear_cancel(); } - #[instrument(skip_all, parent = Span::current(), level = "Trace")] - fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor { - self as &mut dyn Hypervisor - } - #[cfg(crashdump)] fn crashdump_context(&self) -> Result> { if self.rt_cfg.guest_core_dump { diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index df0fd655b..51fc6b0e7 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -33,12 +33,12 @@ use super::gdb::{ DebugCommChannel, DebugMemoryAccess, DebugMsg, DebugResponse, GuestDebug, KvmDebug, VcpuStopReason, }; -use super::{HyperlightExit, Hypervisor, LinuxInterruptHandle, VirtualCPU}; +use super::{Hypervisor, LinuxInterruptHandle}; #[cfg(gdb)] use crate::HyperlightError; use crate::hypervisor::regs::{CommonFpu, CommonRegisters}; -use crate::hypervisor::{InterruptHandle, InterruptHandleImpl, get_memory_access_violation}; -use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; +use crate::hypervisor::{HyperlightExit, InterruptHandle, InterruptHandleImpl}; +use crate::mem::memory_region::MemoryRegion; use crate::mem::mgr::SandboxMemoryManager; use crate::mem::ptr::{GuestPtr, RawPtr}; use crate::mem::shared_mem::HostSharedMemory; @@ -273,11 +273,6 @@ pub(crate) struct KVMDriver { orig_rsp: GuestPtr, interrupt_handle: Arc, - sandbox_regions: Vec, // Initially mapped regions when sandbox is created - mmap_regions: Vec<(MemoryRegion, u32)>, // Later mapped regions (region, slot number) - next_slot: u32, // Monotonically increasing slot number - freed_slots: Vec, // Reusable slots from unmapped regions - #[cfg(gdb)] debug: Option, #[cfg(gdb)] @@ -296,7 +291,6 @@ impl KVMDriver { // TODO: refactor this function to take fewer arguments. Add trace_info to rt_cfg #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn new( - mem_regions: Vec, pml4_addr: u64, entrypoint: u64, rsp: u64, @@ -309,12 +303,6 @@ impl KVMDriver { let vm_fd = kvm.create_vm_with_type(0)?; - mem_regions.iter().enumerate().try_for_each(|(i, region)| { - let mut kvm_region: kvm_userspace_memory_region = region.clone().into(); - kvm_region.slot = i as u32; - unsafe { vm_fd.set_user_memory_region(kvm_region) } - })?; - let vcpu_fd = vm_fd.create_vcpu(0)?; #[cfg(gdb)] @@ -358,10 +346,6 @@ impl KVMDriver { vcpu_fd, entrypoint, orig_rsp: rsp_gp, - next_slot: mem_regions.len() as u32, - sandbox_regions: mem_regions, - mmap_regions: Vec::new(), - freed_slots: Vec::new(), interrupt_handle: interrupt_handle.clone(), #[cfg(gdb)] debug, @@ -391,12 +375,6 @@ impl Debug for KVMDriver { let mut f = f.debug_struct("KVM Driver"); // Output each memory region - for region in &self.sandbox_regions { - f.field("Sandbox Memory Region", ®ion); - } - for region in &self.mmap_regions { - f.field("Mapped Memory Region", ®ion); - } let regs = self.vcpu_fd.get_regs(); // check that regs is OK and then set field in debug struct @@ -449,76 +427,25 @@ impl Hypervisor for KVMDriver { ..Default::default() }; self.set_regs(®s)?; - - let interrupt_handle = self.interrupt_handle.clone(); - - VirtualCPU::run( - self.as_mut_hypervisor(), - interrupt_handle, - mem_mgr, - host_funcs, - #[cfg(gdb)] - dbg_mem_access_fn, - ) + Ok(()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - unsafe fn map_region(&mut self, region: &MemoryRegion) -> Result<()> { - if [ - region.guest_region.start, - region.guest_region.end, - region.host_region.start, - region.host_region.end, - ] - .iter() - .any(|x| x % self.page_size != 0) - { - log_then_return!( - "region is not page-aligned {:x}, {region:?}", - self.page_size - ); - } - - let mut kvm_region: kvm_userspace_memory_region = region.clone().into(); - - // Try to reuse a freed slot first, otherwise use next_slot - let slot = if let Some(freed_slot) = self.freed_slots.pop() { - freed_slot - } else { - let slot = self.next_slot; - self.next_slot += 1; - slot - }; - + unsafe fn map_memory(&mut self, (slot, region): (u32, &MemoryRegion)) -> Result<()> { + let mut kvm_region: kvm_userspace_memory_region = region.into(); kvm_region.slot = slot; - unsafe { self.vm_fd.set_user_memory_region(kvm_region) }?; - self.mmap_regions.push((region.to_owned(), slot)); + unsafe { self.vm_fd.set_user_memory_region(kvm_region)? }; Ok(()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - unsafe fn unmap_region(&mut self, region: &MemoryRegion) -> Result<()> { - if let Some(idx) = self.mmap_regions.iter().position(|(r, _)| r == region) { - let (region, slot) = self.mmap_regions.remove(idx); - let mut kvm_region: kvm_userspace_memory_region = region.into(); - kvm_region.slot = slot; - // Setting memory_size to 0 unmaps the slot's region - // From https://docs.kernel.org/virt/kvm/api.html - // > Deleting a slot is done by passing zero for memory_size. - kvm_region.memory_size = 0; - unsafe { self.vm_fd.set_user_memory_region(kvm_region) }?; - - // Add the freed slot to the reuse list - self.freed_slots.push(slot); - - Ok(()) - } else { - Err(new_error!("Tried to unmap region that is not mapped")) - } - } - - fn get_mapped_regions(&self) -> Box + '_> { - Box::new(self.mmap_regions.iter().map(|(region, _)| region)) + fn unmap_memory(&mut self, (slot, region): (u32, &MemoryRegion)) -> Result<()> { + let mut kvm_region: kvm_userspace_memory_region = region.into(); + kvm_region.slot = slot; + // Setting memory_size to 0 unmaps the slot's region + // From https://docs.kernel.org/virt/kvm/api.html + // > Deleting a slot is done by passing zero for memory_size. + kvm_region.memory_size = 0; + unsafe { self.vm_fd.set_user_memory_region(kvm_region) }?; + Ok(()) } #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] @@ -540,131 +467,34 @@ impl Hypervisor for KVMDriver { // reset fpu state self.set_fpu(&CommonFpu::default())?; - let interrupt_handle = self.interrupt_handle.clone(); - - // run - VirtualCPU::run( - self.as_mut_hypervisor(), - interrupt_handle, - mem_mgr, - host_funcs, - #[cfg(gdb)] - dbg_mem_access_fn, - )?; - - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn handle_io( - &mut self, - port: u16, - data: Vec, - _rip: u64, - _instruction_length: u64, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - ) -> Result<()> { - // KVM does not need RIP or instruction length, as it automatically sets the RIP - - // The payload param for the outb_handle_fn is the first byte - // of the data array cast to an u64. Thus, we need to make sure - // the data array has at least one u8, then convert that to an u64 - if data.is_empty() { - log_then_return!("no data was given in IO interrupt"); - } else { - let mut padded = [0u8; 4]; - let copy_len = data.len().min(4); - padded[..copy_len].copy_from_slice(&data[..copy_len]); - let value = u32::from_le_bytes(padded); - - #[cfg(feature = "mem_profile")] - { - let regs = self.regs()?; - let trace_info = self.trace_info_mut(); - handle_outb(mem_mgr, host_funcs, port, value, ®s, trace_info)?; - } - #[cfg(not(feature = "mem_profile"))] - { - handle_outb(mem_mgr, host_funcs, port, value)?; - } - } - Ok(()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn run( - &mut self, - #[cfg(feature = "trace_guest")] tc: &mut crate::sandbox::trace::TraceContext, - ) -> Result { - #[cfg(feature = "trace_guest")] - tc.setup_guest_trace(Span::current().context()); - - let exit_reason = self.vcpu_fd.run(); - let result = match exit_reason { - Ok(VcpuExit::Hlt) => { - crate::debug!("KVM - Halt Details : {:#?}", &self); - HyperlightExit::Halt() - } - Ok(VcpuExit::IoOut(port, data)) => { - // because vcpufd.run() mutably borrows self we cannot pass self to crate::debug! macro here - crate::debug!("KVM IO Details : \nPort : {}\nData : {:?}", port, data); - // KVM does not need to set RIP or instruction length so these are set to 0 - HyperlightExit::IoOut(port, data.to_vec(), 0, 0) - } - Ok(VcpuExit::MmioRead(addr, _)) => { - crate::debug!("KVM MMIO Read -Details: Address: {} \n {:#?}", addr, &self); - - match get_memory_access_violation( - addr as usize, - self.sandbox_regions - .iter() - .chain(self.mmap_regions.iter().map(|(r, _)| r)), - MemoryRegionFlags::READ, - ) { - Some(access_violation_exit) => access_violation_exit, - None => HyperlightExit::Mmio(addr), - } - } - Ok(VcpuExit::MmioWrite(addr, _)) => { - crate::debug!("KVM MMIO Write -Details: Address: {} \n {:#?}", addr, &self); - - match get_memory_access_violation( - addr as usize, - self.sandbox_regions - .iter() - .chain(self.mmap_regions.iter().map(|(r, _)| r)), - MemoryRegionFlags::WRITE, - ) { - Some(access_violation_exit) => access_violation_exit, - None => HyperlightExit::Mmio(addr), - } - } + fn run_vcpu(&mut self) -> Result { + match self.vcpu_fd.run() { + Ok(VcpuExit::Hlt) => Ok(HyperlightExit::Halt()), + Ok(VcpuExit::IoOut(port, data)) => Ok(HyperlightExit::IoOut(port, data.to_vec())), + Ok(VcpuExit::MmioRead(addr, _)) => Ok(HyperlightExit::MmioRead(addr)), + Ok(VcpuExit::MmioWrite(addr, _)) => Ok(HyperlightExit::MmioWrite(addr)), #[cfg(gdb)] - // KVM provides architecture specific information about the vCPU state when exiting - Ok(VcpuExit::Debug(debug_exit)) => match self.get_stop_reason(debug_exit) { - Ok(reason) => HyperlightExit::Debug(reason), - Err(e) => { - log_then_return!("Error getting stop reason: {:?}", e); - } - }, + Ok(VcpuExit::Debug(debug_exit)) => Ok(HyperlightExit::Debug { + dr6: debug_exit.dr6, + exception: debug_exit.exception, + }), Err(e) => match e.errno() { - // We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR - libc::EINTR => HyperlightExit::Cancelled(), - libc::EAGAIN => HyperlightExit::Retry(), - _ => { - crate::debug!("KVM Error -Details: Address: {} \n {:#?}", e, &self); - log_then_return!("Error running VCPU {:?}", e); - } + // InterruptHandle::kill() sends a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR + libc::EINTR => Ok(HyperlightExit::Cancelled()), + libc::EAGAIN => Ok(HyperlightExit::Retry()), + _ => Ok(HyperlightExit::Unknown(format!( + "Unknown KVM VCPU error: {}", + e + ))), }, - Ok(other) => { - let err_msg = format!("Unexpected KVM Exit {:?}", other); - crate::debug!("KVM Other Exit Details: {:#?}", &self); - HyperlightExit::Unknown(err_msg) - } - }; - Ok(result) + Ok(other) => Ok(HyperlightExit::Unknown(format!( + "Unknown KVM VCPU exit: {:?}", + other + ))), + } } fn regs(&self) -> Result { @@ -700,11 +530,6 @@ impl Hypervisor for KVMDriver { Ok(()) } - #[instrument(skip_all, parent = Span::current(), level = "Trace")] - fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor { - self as &mut dyn Hypervisor - } - fn interrupt_handle(&self) -> Arc { self.interrupt_handle.clone() } diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 55ed2f465..cc80610d0 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -14,17 +14,13 @@ See the License for the specific language governing permissions and limitations under the License. */ -use log::{LevelFilter, debug}; -use tracing::{Span, instrument}; +use log::LevelFilter; -use crate::HyperlightError::StackOverflow; -use crate::error::HyperlightError::ExecutionCanceledByHost; +use crate::Result; use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; -use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; -use crate::metrics::{METRIC_ERRONEOUS_VCPU_KICKS, METRIC_GUEST_CANCELLATION}; +use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType}; #[cfg(feature = "mem_profile")] use crate::sandbox::trace::MemTraceInfo; -use crate::{HyperlightError, Result, log_then_return}; /// HyperV-on-linux functionality #[cfg(mshv3)] @@ -82,23 +78,27 @@ use crate::sandbox::host_funcs::FunctionRegistry; /// the hypervisor specific exit reasons to these generic ones pub enum HyperlightExit { #[cfg(gdb)] - /// The vCPU has exited due to a debug event - Debug(VcpuStopReason), + Debug { dr6: u64, exception: u32 }, /// The vCPU has halted Halt(), /// The vCPU has issued a write to the given port with the given value - IoOut(u16, Vec, u64, u64), - /// The vCPU has attempted to read or write from an unmapped address - Mmio(u64), - /// The vCPU tried to access memory but was missing the required permissions - AccessViolation(u64, MemoryRegionFlags, MemoryRegionFlags), + IoOut(u16, Vec), + /// The vCPU tried to read from the given (unmapped) addr + MmioRead(u64), + /// The vCPU tried to write to the given (unmapped) addr + MmioWrite(u64), /// The vCPU execution has been cancelled Cancelled(), /// The vCPU has exited for a reason that is not handled by Hyperlight Unknown(String), - /// The operation should be retried - /// On Linux this can happen where a call to run the CPU can return EAGAIN - /// On Windows the platform could cause a cancelation of the VM run + /// The operation should be retried, for example this can happen on Linux where a call to run the CPU can return EAGAIN + #[cfg_attr( + target_os = "windows", + expect( + dead_code, + reason = "Retry() is never constructed on Windows, but it is still matched on (which dead_code lint ignores)" + ) + )] Retry(), } @@ -118,19 +118,17 @@ pub(crate) trait Hypervisor: Debug + Send { #[cfg(gdb)] dbg_mem_access_fn: Arc>>, ) -> Result<()>; - /// Map a region of host memory into the sandbox. + /// Map memory region into this VM /// - /// Depending on the host platform, there are likely alignment - /// requirements of at least one page for base and len. - unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()>; + /// # Safety + /// The caller must ensure that the memory region is valid and points to valid memory, + /// and lives long enough for the VM to use it. + /// The caller must ensure that the given u32 is not already mapped, otherwise previously mapped + /// memory regions may be overwritten. + unsafe fn map_memory(&mut self, region: (u32, &MemoryRegion)) -> Result<()>; - /// Unmap a memory region from the sandbox - unsafe fn unmap_region(&mut self, rgn: &MemoryRegion) -> Result<()>; - - /// Get the currently mapped dynamic memory regions (not including sandbox regions) - /// - /// Note: Box needed for trait to be object-safe :( - fn get_mapped_regions(&self) -> Box + '_>; + /// Unmap memory region from this VM that has previously been mapped using `map_memory`. + fn unmap_memory(&mut self, region: (u32, &MemoryRegion)) -> Result<()>; /// Dispatch a call from the host to the guest using the given pointer /// to the dispatch function _in the guest's address space_. @@ -147,22 +145,9 @@ pub(crate) trait Hypervisor: Debug + Send { #[cfg(gdb)] dbg_mem_access_fn: Arc>>, ) -> Result<()>; - /// Handle an IO exit from the internally stored vCPU. - fn handle_io( - &mut self, - port: u16, - data: Vec, - rip: u64, - instruction_length: u64, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - ) -> Result<()>; - - /// Run the vCPU - fn run( - &mut self, - #[cfg(feature = "trace_guest")] tc: &mut crate::sandbox::trace::TraceContext, - ) -> Result; + /// Runs the vCPU until it exits. + /// Note: this function should not emit any traces or spans as it is called after guest span is setup + fn run_vcpu(&mut self) -> Result; /// Get InterruptHandle to underlying VM (returns internal trait) fn interrupt_handle(&self) -> Arc; @@ -239,9 +224,6 @@ pub(crate) trait Hypervisor: Debug + Send { LevelFilter::from_str(level).unwrap_or(LevelFilter::Error) as u32 } - /// get a mutable trait object from self - fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor; - #[cfg(crashdump)] fn crashdump_context(&self) -> Result>; @@ -260,205 +242,32 @@ pub(crate) trait Hypervisor: Debug + Send { fn trace_info_mut(&mut self) -> &mut MemTraceInfo; } -/// Returns a Some(HyperlightExit::AccessViolation(..)) if the given gpa doesn't have -/// access its corresponding region. Returns None otherwise, or if the region is not found. -pub(crate) fn get_memory_access_violation<'a>( +/// The vCPU tried to access the given addr +enum MemoryAccess { + /// The accessed region has the given flags + AccessViolation(MemoryRegionFlags), + /// The accessed region is a stack guard page + StackGuardPageViolation, +} + +/// Determines if a known memory access violation occurred at the given address with the given action type. +/// Returns Some(reason) if violation reason could be determined, or None if violation occurred but in unmapped region. +fn get_memory_access_violation<'a>( gpa: usize, + tried: MemoryRegionFlags, mut mem_regions: impl Iterator, - access_info: MemoryRegionFlags, -) -> Option { +) -> Option { // find the region containing the given gpa let region = mem_regions.find(|region| region.guest_region.contains(&gpa)); - if let Some(region) = region - && (!region.flags.contains(access_info) - || region.flags.contains(MemoryRegionFlags::STACK_GUARD)) - { - return Some(HyperlightExit::AccessViolation( - gpa as u64, - access_info, - region.flags, - )); - } - None -} - -/// A virtual CPU that can be run until an exit occurs -pub struct VirtualCPU {} - -impl VirtualCPU { - /// Run the given hypervisor until a halt instruction is reached - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn run( - hv: &mut dyn Hypervisor, - interrupt_handle: Arc, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - #[cfg(gdb)] dbg_mem_access_fn: Arc>>, - ) -> Result<()> { - // Keeps the trace context and open spans - #[cfg(feature = "trace_guest")] - let mut tc = crate::sandbox::trace::TraceContext::new(); - - loop { - // ===== KILL() TIMING POINT 2: Before set_running() ===== - // If kill() is called and ran to completion BEFORE this line executes: - // - CANCEL_BIT will be set and we will return an early VmExit::Cancelled() - // without sending any signals/WHV api calls - #[cfg(any(kvm, mshv3))] - interrupt_handle.set_tid(); - interrupt_handle.set_running(); - // NOTE: `set_running()`` must be called before checking `is_cancelled()` - // otherwise we risk missing a call to `kill()` because the vcpu would not be marked as running yet so signals won't be sent - - let exit_reason = { - if interrupt_handle.is_cancelled() || interrupt_handle.is_debug_interrupted() { - Ok(HyperlightExit::Cancelled()) - } else { - // ==== KILL() TIMING POINT 3: Before calling run() ==== - // If kill() is called and ran to completion BEFORE this line executes: - // - Will still do a VM entry, but signals will be sent until VM exits - #[cfg(feature = "trace_guest")] - let result = hv.run(&mut tc); - #[cfg(not(feature = "trace_guest"))] - let result = hv.run(); - - // End current host trace by closing the current span that captures traces - // happening when a guest exits and re-enters. - #[cfg(feature = "trace_guest")] - tc.end_host_trace(); - - // Handle the guest trace data if any - #[cfg(feature = "trace_guest")] - { - let regs = hv.regs()?; - if let Err(e) = tc.handle_trace(®s, mem_mgr) { - // If no trace data is available, we just log a message and continue - // Is this the right thing to do? - log::debug!("Error handling guest trace: {:?}", e); - } - } - - result - } - }; - - // ===== KILL() TIMING POINT 4: Before clear_running() ===== - // If kill() is called and ran to completion BEFORE this line executes: - // - CANCEL_BIT will be set. Cancellation is deferred to the next iteration. - // - Signals will be sent until `clear_running()` is called, which is ok - interrupt_handle.clear_running(); - - // ===== KILL() TIMING POINT 5: Before capturing cancel_requested ===== - // If kill() is called and ran to completion BEFORE this line executes: - // - CANCEL_BIT will be set. Cancellation is deferred to the next iteration. - // - Signals will not be sent - let cancel_requested = interrupt_handle.is_cancelled(); - let debug_interrupted = interrupt_handle.is_debug_interrupted(); - - // ===== KILL() TIMING POINT 6: Before checking exit_reason ===== - // If kill() is called and ran to completion BEFORE this line executes: - // - CANCEL_BIT will be set. Cancellation is deferred to the next iteration. - // - Signals will not be sent - match exit_reason { - #[cfg(gdb)] - Ok(HyperlightExit::Debug(stop_reason)) => { - if let Err(e) = hv.handle_debug(dbg_mem_access_fn.clone(), stop_reason) { - log_then_return!(e); - } - } - - Ok(HyperlightExit::Halt()) => { - break; - } - Ok(HyperlightExit::IoOut(port, data, rip, instruction_length)) => { - hv.handle_io(port, data, rip, instruction_length, mem_mgr, host_funcs)? - } - Ok(HyperlightExit::Mmio(addr)) => { - #[cfg(crashdump)] - crashdump::generate_crashdump(hv)?; - - if !mem_mgr.check_stack_guard()? { - log_then_return!(StackOverflow()); - } - - log_then_return!("MMIO access address {:#x}", addr); - } - Ok(HyperlightExit::AccessViolation(addr, tried, region_permission)) => { - #[cfg(crashdump)] - crashdump::generate_crashdump(hv)?; - - // If GDB is enabled, we handle the debug memory access - // Disregard return value as we want to return the error - #[cfg(gdb)] - let _ = hv.handle_debug(dbg_mem_access_fn.clone(), VcpuStopReason::Crash); - - if region_permission.intersects(MemoryRegionFlags::STACK_GUARD) { - return Err(HyperlightError::StackOverflow()); - } - log_then_return!(HyperlightError::MemoryAccessViolation( - addr, - tried, - region_permission - )); - } - Ok(HyperlightExit::Cancelled()) => { - // If cancellation was not requested for this specific guest function call, - // the vcpu was interrupted by a stale cancellation. This can occur when: - // - Linux: A signal from a previous call arrives late - // - Windows: WHvCancelRunVirtualProcessor called right after vcpu exits but RUNNING_BIT is still true - if !cancel_requested && !debug_interrupted { - // Track that an erroneous vCPU kick occurred - metrics::counter!(METRIC_ERRONEOUS_VCPU_KICKS).increment(1); - // treat this the same as a HyperlightExit::Retry, the cancel was not meant for this call - continue; - } - - // If the vcpu was interrupted by a debugger, we need to handle it - #[cfg(gdb)] - { - interrupt_handle.clear_debug_interrupt(); - if let Err(e) = - hv.handle_debug(dbg_mem_access_fn.clone(), VcpuStopReason::Interrupt) - { - log_then_return!(e); - } - } - - // Shutdown is returned when the host has cancelled execution - // After termination, the main thread will re-initialize the VM - metrics::counter!(METRIC_GUEST_CANCELLATION).increment(1); - log_then_return!(ExecutionCanceledByHost()); - } - Ok(HyperlightExit::Unknown(reason)) => { - #[cfg(crashdump)] - crashdump::generate_crashdump(hv)?; - // If GDB is enabled, we handle the debug memory access - // Disregard return value as we want to return the error - #[cfg(gdb)] - let _ = hv.handle_debug(dbg_mem_access_fn.clone(), VcpuStopReason::Crash); - - log_then_return!("Unexpected VM Exit {:?}", reason); - } - Ok(HyperlightExit::Retry()) => { - debug!("[VCPU] Retry - continuing VM run loop"); - continue; - } - Err(e) => { - #[cfg(crashdump)] - crashdump::generate_crashdump(hv)?; - // If GDB is enabled, we handle the debug memory access - // Disregard return value as we want to return the error - #[cfg(gdb)] - let _ = hv.handle_debug(dbg_mem_access_fn.clone(), VcpuStopReason::Crash); - - return Err(e); - } - } + if let Some(region) = region { + if region.region_type == MemoryRegionType::GuardPage { + return Some(MemoryAccess::StackGuardPageViolation); + } else if !region.flags.contains(tried) { + return Some(MemoryAccess::AccessViolation(region.flags)); } - - Ok(()) } + None } /// A trait for platform-specific interrupt handle implementation details diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index bc3c9d9b6..2b801329b 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -248,8 +248,8 @@ impl MemoryRegionVecBuilder { } #[cfg(mshv3)] -impl From for mshv_user_mem_region { - fn from(region: MemoryRegion) -> Self { +impl From<&MemoryRegion> for mshv_user_mem_region { + fn from(region: &MemoryRegion) -> Self { let size = (region.guest_region.end - region.guest_region.start) as u64; let guest_pfn = region.guest_region.start as u64 >> PAGE_SHIFT; let userspace_addr = region.host_region.start as u64; @@ -276,8 +276,8 @@ impl From for mshv_user_mem_region { } #[cfg(kvm)] -impl From for kvm_bindings::kvm_userspace_memory_region { - fn from(region: MemoryRegion) -> Self { +impl From<&MemoryRegion> for kvm_bindings::kvm_userspace_memory_region { + fn from(region: &MemoryRegion) -> Self { let perm_flags = MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE; diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index afb065047..232c7ae9f 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -278,10 +278,12 @@ impl MultiUseSandbox { let regions_to_map = snapshot_regions.difference(¤t_regions); for region in regions_to_unmap { - unsafe { self.vm.unmap_region(region)? }; + self.vm.unmap_region(region)?; } for region in regions_to_map { + // Safety: The region has been mapped before, and at that point the caller promised that the memory region is valid + // in their call to `MultiUseSandbox::map_region` unsafe { self.vm.map_region(region)? }; } @@ -1224,7 +1226,7 @@ mod tests { // 1. Take snapshot 1 with no additional regions mapped let snapshot1 = sbox.snapshot().unwrap(); - assert_eq!(sbox.vm.get_mapped_regions().len(), 0); + assert_eq!(sbox.vm.get_mapped_regions().count(), 0); // 2. Map a memory region let map_mem = allocate_guest_memory(); @@ -1232,19 +1234,19 @@ mod tests { let region = region_for_memory(&map_mem, guest_base, MemoryRegionFlags::READ); unsafe { sbox.map_region(®ion).unwrap() }; - assert_eq!(sbox.vm.get_mapped_regions().len(), 1); + assert_eq!(sbox.vm.get_mapped_regions().count(), 1); // 3. Take snapshot 2 with 1 region mapped let snapshot2 = sbox.snapshot().unwrap(); - assert_eq!(sbox.vm.get_mapped_regions().len(), 1); + assert_eq!(sbox.vm.get_mapped_regions().count(), 1); // 4. Restore to snapshot 1 (should unmap the region) sbox.restore(&snapshot1).unwrap(); - assert_eq!(sbox.vm.get_mapped_regions().len(), 0); + assert_eq!(sbox.vm.get_mapped_regions().count(), 0); // 5. Restore forward to snapshot 2 (should remap the region) sbox.restore(&snapshot2).unwrap(); - assert_eq!(sbox.vm.get_mapped_regions().len(), 1); + assert_eq!(sbox.vm.get_mapped_regions().count(), 1); // Verify the region is the same let mut restored_regions = sbox.vm.get_mapped_regions(); diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index a5be963a3..86611af05 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -172,7 +172,6 @@ pub(crate) fn set_up_hypervisor_partition( #[cfg(mshv3)] Some(HypervisorType::Mshv) => { let hv = crate::hypervisor::hyperv_linux::HypervLinuxDriver::new( - regions, entrypoint_ptr, rsp_ptr, pml4_ptr, @@ -190,7 +189,6 @@ pub(crate) fn set_up_hypervisor_partition( #[cfg(kvm)] Some(HypervisorType::Kvm) => { let hv = crate::hypervisor::kvm::KVMDriver::new( - regions, pml4_ptr.absolute()?, entrypoint_ptr.absolute()?, rsp_ptr.absolute()?, @@ -234,7 +232,7 @@ pub(crate) fn set_up_hypervisor_partition( } }; - Ok(HyperlightVm::new(vm)) + HyperlightVm::new(vm, regions, config) } #[cfg(test)] From b7675437a8646d355e85992ed509d9d365d8cc4d Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Mon, 24 Nov 2025 11:56:38 -0800 Subject: [PATCH 3/4] Move debugging/gdb to HyperlightVm. Unify Windows files. Introduce Vm trait Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/hypervisor/crashdump.rs | 4 +- .../src/hypervisor/gdb/arch.rs | 43 +- .../src/hypervisor/gdb/hyperv_debug.rs | 236 --- .../src/hypervisor/gdb/kvm_debug.rs | 253 ---- src/hyperlight_host/src/hypervisor/gdb/mod.rs | 192 +-- .../src/hypervisor/gdb/mshv_debug.rs | 237 --- .../src/hypervisor/hyperlight_vm.rs | 772 +++++++++- .../src/hypervisor/hyperv_linux.rs | 876 ++--------- .../src/hypervisor/hyperv_windows.rs | 1308 ++++++++--------- src/hyperlight_host/src/hypervisor/kvm.rs | 742 ++-------- src/hyperlight_host/src/hypervisor/mod.rs | 212 +-- .../hypervisor/windows_hypervisor_platform.rs | 586 -------- .../src/hypervisor/wrappers.rs | 12 - src/hyperlight_host/src/sandbox/hypervisor.rs | 4 +- .../src/sandbox/initialized_multi_use.rs | 6 +- src/hyperlight_host/src/sandbox/mod.rs | 3 - .../src/sandbox/uninitialized_evolve.rs | 91 +- 17 files changed, 1674 insertions(+), 3903 deletions(-) delete mode 100644 src/hyperlight_host/src/hypervisor/gdb/hyperv_debug.rs delete mode 100644 src/hyperlight_host/src/hypervisor/gdb/kvm_debug.rs delete mode 100644 src/hyperlight_host/src/hypervisor/gdb/mshv_debug.rs delete mode 100644 src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs diff --git a/src/hyperlight_host/src/hypervisor/crashdump.rs b/src/hyperlight_host/src/hypervisor/crashdump.rs index 86dce75ff..a5f7c19fc 100644 --- a/src/hyperlight_host/src/hypervisor/crashdump.rs +++ b/src/hyperlight_host/src/hypervisor/crashdump.rs @@ -23,7 +23,7 @@ use elfcore::{ ReadProcessMemory, ThreadView, VaProtection, VaRegion, }; -use super::Hypervisor; +use crate::hypervisor::hyperlight_vm::HyperlightVm; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::{Result, new_error}; @@ -262,7 +262,7 @@ impl ReadProcessMemory for GuestMemReader { /// /// # Returns /// * `Result<()>`: Success or error -pub(crate) fn generate_crashdump(hv: &dyn Hypervisor) -> Result<()> { +pub(crate) fn generate_crashdump(hv: &HyperlightVm) -> Result<()> { // Get crash context from hypervisor let ctx = hv .crashdump_context() diff --git a/src/hyperlight_host/src/hypervisor/gdb/arch.rs b/src/hyperlight_host/src/hypervisor/gdb/arch.rs index e75eade74..05f221e46 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/arch.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/arch.rs @@ -16,16 +16,16 @@ limitations under the License. //! This file contains architecture specific code for the x86_64 -use std::collections::HashMap; - -use super::VcpuStopReason; +use super::{DebuggableVm, VcpuStopReason}; +use crate::Result; +use crate::hypervisor::regs::CommonRegisters; // Described in Table 6-1. Exceptions and Interrupts at Page 6-13 Vol. 1 // of Intel 64 and IA-32 Architectures Software Developer's Manual /// Exception id for #DB -const DB_EX_ID: u32 = 1; +pub(crate) const DB_EX_ID: u32 = 1; /// Exception id for #BP - triggered by the INT3 instruction -const BP_EX_ID: u32 = 3; +pub(crate) const BP_EX_ID: u32 = 3; /// Software Breakpoint size in memory pub(crate) const SW_BP_SIZE: usize = 1; @@ -51,61 +51,52 @@ pub(crate) const DR6_HW_BP_FLAGS_MASK: u64 = 0x0F << DR6_HW_BP_FLAGS_POS; /// Determine the reason the vCPU stopped /// This is done by checking the DR6 register and the exception id -/// NOTE: Additional checks are done for the entrypoint, stored hw_breakpoints -/// and sw_breakpoints to ensure the stop reason is valid with internal state pub(crate) fn vcpu_stop_reason( - single_step: bool, - rip: u64, + vm: &mut dyn DebuggableVm, dr6: u64, entrypoint: u64, exception: u32, - hw_breakpoints: &[u64], - sw_breakpoints: &HashMap, -) -> VcpuStopReason { +) -> Result { + let CommonRegisters { rip, .. } = vm.regs()?; if DB_EX_ID == exception { // If the BS flag in DR6 register is set, it means a single step // instruction triggered the exit // Check page 19-4 Vol. 3B of Intel 64 and IA-32 // Architectures Software Developer's Manual - if dr6 & DR6_BS_FLAG_MASK != 0 && single_step { - return VcpuStopReason::DoneStep; + if dr6 & DR6_BS_FLAG_MASK != 0 { + return Ok(VcpuStopReason::DoneStep); } // If any of the B0-B3 flags in DR6 register is set, it means a // hardware breakpoint triggered the exit // Check page 19-4 Vol. 3B of Intel 64 and IA-32 // Architectures Software Developer's Manual - if DR6_HW_BP_FLAGS_MASK & dr6 != 0 && hw_breakpoints.contains(&rip) { + if DR6_HW_BP_FLAGS_MASK & dr6 != 0 { if rip == entrypoint { - return VcpuStopReason::EntryPointBp; + vm.remove_hw_breakpoint(entrypoint)?; + return Ok(VcpuStopReason::EntryPointBp); } - return VcpuStopReason::HwBp; + return Ok(VcpuStopReason::HwBp); } } - if BP_EX_ID == exception && sw_breakpoints.contains_key(&rip) { - return VcpuStopReason::SwBp; + if BP_EX_ID == exception { + return Ok(VcpuStopReason::SwBp); } // Log an error and provide internal debugging info log::error!( r"The vCPU exited because of an unknown reason: - single_step: {:?} rip: {:?} dr6: {:?} entrypoint: {:?} exception: {:?} - hw_breakpoints: {:?} - sw_breakpoints: {:?} ", - single_step, rip, dr6, entrypoint, exception, - hw_breakpoints, - sw_breakpoints, ); - VcpuStopReason::Unknown + Ok(VcpuStopReason::Unknown) } diff --git a/src/hyperlight_host/src/hypervisor/gdb/hyperv_debug.rs b/src/hyperlight_host/src/hypervisor/gdb/hyperv_debug.rs deleted file mode 100644 index f5b332d02..000000000 --- a/src/hyperlight_host/src/hypervisor/gdb/hyperv_debug.rs +++ /dev/null @@ -1,236 +0,0 @@ -/* -Copyright 2024 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use std::collections::HashMap; - -use windows::Win32::System::Hypervisor::WHV_VP_EXCEPTION_CONTEXT; - -use super::arch::{MAX_NO_OF_HW_BP, vcpu_stop_reason}; -use super::{GuestDebug, SW_BP_SIZE, VcpuStopReason}; -use crate::hypervisor::regs::{CommonFpu, CommonRegisters}; -use crate::hypervisor::windows_hypervisor_platform::VMProcessor; -use crate::hypervisor::wrappers::WHvDebugRegisters; -use crate::{HyperlightError, Result, new_error}; - -/// KVM Debug struct -/// This struct is used to abstract the internal details of the kvm -/// guest debugging settings -#[derive(Default)] -pub(crate) struct HypervDebug { - /// vCPU stepping state - single_step: bool, - - /// Array of addresses for HW breakpoints - hw_breakpoints: Vec, - /// Saves the bytes modified to enable SW breakpoints - sw_breakpoints: HashMap, - - /// Debug registers - dbg_cfg: WHvDebugRegisters, -} - -impl HypervDebug { - pub(crate) fn new() -> Self { - Self { - single_step: false, - hw_breakpoints: vec![], - sw_breakpoints: HashMap::new(), - dbg_cfg: WHvDebugRegisters::default(), - } - } - - /// Returns the instruction pointer from the stopped vCPU - fn get_instruction_pointer(&self, vcpu_fd: &VMProcessor) -> Result { - let regs = vcpu_fd - .regs() - .map_err(|e| new_error!("Could not retrieve registers from vCPU: {:?}", e))?; - - Ok(regs.rip) - } - - /// This method sets the kvm debugreg fields to enable breakpoints at - /// specific addresses - /// - /// The first 4 debug registers are used to set the addresses - /// The 4th and 5th debug registers are obsolete and not used - /// The 7th debug register is used to enable the breakpoints - /// For more information see: DEBUG REGISTERS chapter in the architecture - /// manual - fn set_debug_config(&mut self, vcpu_fd: &VMProcessor, step: bool) -> Result<()> { - let addrs = &self.hw_breakpoints; - - let mut dbg_cfg = WHvDebugRegisters::default(); - - for (k, addr) in addrs.iter().enumerate() { - match k { - 0 => { - dbg_cfg.dr0 = *addr; - } - 1 => { - dbg_cfg.dr1 = *addr; - } - 2 => { - dbg_cfg.dr2 = *addr; - } - 3 => { - dbg_cfg.dr3 = *addr; - } - _ => { - Err(new_error!("Tried to set more than 4 HW breakpoints"))?; - } - } - dbg_cfg.dr7 |= 1 << (k * 2); - } - - self.dbg_cfg = dbg_cfg; - - vcpu_fd - .set_debug_regs(&self.dbg_cfg) - .map_err(|e| new_error!("Could not set guest debug: {:?}", e))?; - - self.single_step = step; - - let mut regs = vcpu_fd - .regs() - .map_err(|e| new_error!("Could not get registers: {:?}", e))?; - - // Set TF Flag to enable Traps - if self.single_step { - regs.rflags |= 1 << 8; // Set the TF flag - } else { - regs.rflags &= !(1 << 8); // Clear the TF flag - } - - vcpu_fd - .set_regs(®s) - .map_err(|e| new_error!("Could not set guest registers: {:?}", e))?; - - Ok(()) - } - - /// Get the reason the vCPU has stopped - pub(crate) fn get_stop_reason( - &mut self, - vcpu_fd: &VMProcessor, - exception: WHV_VP_EXCEPTION_CONTEXT, - entrypoint: u64, - ) -> Result { - let rip = self.get_instruction_pointer(vcpu_fd)?; - let rip = self.translate_gva(vcpu_fd, rip)?; - - let debug_regs = vcpu_fd - .get_debug_regs() - .map_err(|e| new_error!("Could not retrieve registers from vCPU: {:?}", e))?; - - // Check if the vCPU stopped because of a hardware breakpoint - let reason = vcpu_stop_reason( - self.single_step, - rip, - debug_regs.dr6, - entrypoint, - exception.ExceptionType as u32, - &self.hw_breakpoints, - &self.sw_breakpoints, - ); - - if let VcpuStopReason::EntryPointBp = reason { - // In case the hw breakpoint is the entry point, remove it to - // avoid hanging here as gdb does not remove breakpoints it - // has not set. - // Gdb expects the target to be stopped when connected. - self.remove_hw_breakpoint(vcpu_fd, entrypoint)?; - } - - Ok(reason) - } -} - -impl GuestDebug for HypervDebug { - type Vcpu = VMProcessor; - - fn is_hw_breakpoint(&self, addr: &u64) -> bool { - self.hw_breakpoints.contains(addr) - } - fn is_sw_breakpoint(&self, addr: &u64) -> bool { - self.sw_breakpoints.contains_key(addr) - } - fn save_hw_breakpoint(&mut self, addr: &u64) -> bool { - if self.hw_breakpoints.len() >= MAX_NO_OF_HW_BP { - false - } else { - self.hw_breakpoints.push(*addr); - - true - } - } - fn save_sw_breakpoint_data(&mut self, addr: u64, data: [u8; 1]) { - _ = self.sw_breakpoints.insert(addr, data); - } - fn delete_hw_breakpoint(&mut self, addr: &u64) { - self.hw_breakpoints.retain(|&a| a != *addr); - } - fn delete_sw_breakpoint_data(&mut self, addr: &u64) -> Option<[u8; 1]> { - self.sw_breakpoints.remove(addr) - } - - fn read_regs(&self, vcpu_fd: &Self::Vcpu) -> Result<(CommonRegisters, CommonFpu)> { - log::debug!("Read registers"); - let regs = vcpu_fd - .regs() - .map_err(|e| new_error!("Could not read guest registers: {:?}", e))?; - - let fpu = vcpu_fd - .fpu() - .map_err(|e| new_error!("Could not read guest FPU registers: {:?}", e))?; - - Ok((regs, fpu)) - } - - fn set_single_step(&mut self, vcpu_fd: &Self::Vcpu, enable: bool) -> Result<()> { - self.set_debug_config(vcpu_fd, enable) - } - - fn translate_gva(&self, vcpu_fd: &Self::Vcpu, gva: u64) -> Result { - vcpu_fd - .translate_gva(gva) - .map_err(|_| HyperlightError::TranslateGuestAddress(gva)) - } - - fn write_regs( - &self, - vcpu_fd: &Self::Vcpu, - regs: &CommonRegisters, - fpu: &CommonFpu, - ) -> Result<()> { - log::debug!("Write registers"); - - vcpu_fd - .set_regs(regs) - .map_err(|e| new_error!("Could not write guest registers: {:?}", e))?; - - // Only xmm and mxcsr is piped though in the given fpu, so only set those - let mut current_fpu: CommonFpu = vcpu_fd - .fpu() - .map_err(|e| new_error!("Could not read guest FPU registers: {:?}", e))?; - current_fpu.mxcsr = fpu.mxcsr; - current_fpu.xmm = fpu.xmm; - - vcpu_fd - .set_fpu(¤t_fpu) - .map_err(|e| new_error!("Could not write guest FPU registers: {:?}", e))?; - Ok(()) - } -} diff --git a/src/hyperlight_host/src/hypervisor/gdb/kvm_debug.rs b/src/hyperlight_host/src/hypervisor/gdb/kvm_debug.rs deleted file mode 100644 index ae5996d49..000000000 --- a/src/hyperlight_host/src/hypervisor/gdb/kvm_debug.rs +++ /dev/null @@ -1,253 +0,0 @@ -/* -Copyright 2025 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use std::collections::HashMap; - -use kvm_bindings::{ - KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_GUESTDBG_USE_HW_BP, KVM_GUESTDBG_USE_SW_BP, - kvm_debug_exit_arch, kvm_guest_debug, -}; -use kvm_ioctls::VcpuFd; - -use super::arch::{MAX_NO_OF_HW_BP, SW_BP_SIZE, vcpu_stop_reason}; -use super::{GuestDebug, VcpuStopReason}; -use crate::hypervisor::regs::{CommonFpu, CommonRegisters}; -use crate::{HyperlightError, Result, new_error}; - -/// KVM Debug struct -/// This struct is used to abstract the internal details of the kvm -/// guest debugging settings -#[derive(Default)] -pub(crate) struct KvmDebug { - /// vCPU stepping state - single_step: bool, - - /// Array of addresses for HW breakpoints - hw_breakpoints: Vec, - /// Saves the bytes modified to enable SW breakpoints - sw_breakpoints: HashMap, - - /// Sent to KVM for enabling guest debug - dbg_cfg: kvm_guest_debug, -} - -impl KvmDebug { - pub(crate) fn new() -> Self { - let dbg = kvm_guest_debug { - control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP, - ..Default::default() - }; - - Self { - single_step: false, - hw_breakpoints: vec![], - sw_breakpoints: HashMap::new(), - dbg_cfg: dbg, - } - } - - /// Returns the instruction pointer from the stopped vCPU - fn get_instruction_pointer(&self, vcpu_fd: &VcpuFd) -> Result { - let regs = vcpu_fd - .get_regs() - .map_err(|e| new_error!("Could not retrieve registers from vCPU: {:?}", e))?; - - Ok(regs.rip) - } - - /// This method sets the kvm debugreg fields to enable breakpoints at - /// specific addresses - /// - /// The first 4 debug registers are used to set the addresses - /// The 4th and 5th debug registers are obsolete and not used - /// The 7th debug register is used to enable the breakpoints - /// For more information see: DEBUG REGISTERS chapter in the architecture - /// manual - fn set_debug_config(&mut self, vcpu_fd: &VcpuFd, step: bool) -> Result<()> { - let addrs = &self.hw_breakpoints; - - self.dbg_cfg.arch.debugreg = [0; 8]; - for (k, addr) in addrs.iter().enumerate() { - self.dbg_cfg.arch.debugreg[k] = *addr; - self.dbg_cfg.arch.debugreg[7] |= 1 << (k * 2); - } - - if !addrs.is_empty() { - self.dbg_cfg.control |= KVM_GUESTDBG_USE_HW_BP; - } else { - self.dbg_cfg.control &= !KVM_GUESTDBG_USE_HW_BP; - } - - if step { - self.dbg_cfg.control |= KVM_GUESTDBG_SINGLESTEP; - } else { - self.dbg_cfg.control &= !KVM_GUESTDBG_SINGLESTEP; - } - - log::debug!("Setting bp: {:?} cfg: {:?}", addrs, self.dbg_cfg); - vcpu_fd - .set_guest_debug(&self.dbg_cfg) - .map_err(|e| new_error!("Could not set guest debug: {:?}", e))?; - - self.single_step = step; - - Ok(()) - } - - /// Get the reason the vCPU has stopped - pub(crate) fn get_stop_reason( - &mut self, - vcpu_fd: &VcpuFd, - debug_exit: kvm_debug_exit_arch, - entrypoint: u64, - ) -> Result { - let rip = self.get_instruction_pointer(vcpu_fd)?; - let rip = self.translate_gva(vcpu_fd, rip)?; - - // Check if the vCPU stopped because of a hardware breakpoint - let reason = vcpu_stop_reason( - self.single_step, - rip, - debug_exit.dr6, - entrypoint, - debug_exit.exception, - &self.hw_breakpoints, - &self.sw_breakpoints, - ); - - if let VcpuStopReason::EntryPointBp = reason { - // In case the hw breakpoint is the entry point, remove it to - // avoid hanging here as gdb does not remove breakpoints it - // has not set. - // Gdb expects the target to be stopped when connected. - self.remove_hw_breakpoint(vcpu_fd, entrypoint)?; - } - - Ok(reason) - } -} - -impl GuestDebug for KvmDebug { - type Vcpu = VcpuFd; - - fn is_hw_breakpoint(&self, addr: &u64) -> bool { - self.hw_breakpoints.contains(addr) - } - fn is_sw_breakpoint(&self, addr: &u64) -> bool { - self.sw_breakpoints.contains_key(addr) - } - fn save_hw_breakpoint(&mut self, addr: &u64) -> bool { - if self.hw_breakpoints.len() >= MAX_NO_OF_HW_BP { - false - } else { - self.hw_breakpoints.push(*addr); - - true - } - } - fn save_sw_breakpoint_data(&mut self, addr: u64, data: [u8; 1]) { - _ = self.sw_breakpoints.insert(addr, data); - } - fn delete_hw_breakpoint(&mut self, addr: &u64) { - self.hw_breakpoints.retain(|&a| a != *addr); - } - fn delete_sw_breakpoint_data(&mut self, addr: &u64) -> Option<[u8; 1]> { - self.sw_breakpoints.remove(addr) - } - - fn read_regs(&self, vcpu_fd: &Self::Vcpu) -> Result<(CommonRegisters, CommonFpu)> { - log::debug!("Read registers"); - let regs = vcpu_fd - .get_regs() - .map_err(|e| new_error!("Could not read guest registers: {:?}", e))?; - - let fpu_data = vcpu_fd - .get_fpu() - .map_err(|e| new_error!("Could not read guest FPU registers: {:?}", e))?; - let mut fpu: CommonFpu = CommonFpu::from(&fpu_data); - - // Read MXCSR from XSAVE (MXCSR is at byte offset 24 -> u32 index 6) - // 11.5.10 Mode-Specific XSAVE/XRSTOR State Management - match vcpu_fd.get_xsave() { - Ok(xsave) => { - fpu.mxcsr = xsave.region[6]; - } - Err(e) => { - log::warn!("Failed to read XSAVE for MXCSR: {:?}", e); - } - } - - Ok((CommonRegisters::from(®s), fpu)) - } - - fn set_single_step(&mut self, vcpu_fd: &Self::Vcpu, enable: bool) -> Result<()> { - self.set_debug_config(vcpu_fd, enable) - } - - fn translate_gva(&self, vcpu_fd: &Self::Vcpu, gva: u64) -> Result { - let tr = vcpu_fd - .translate_gva(gva) - .map_err(|_| HyperlightError::TranslateGuestAddress(gva))?; - - if tr.valid == 0 { - Err(HyperlightError::TranslateGuestAddress(gva)) - } else { - Ok(tr.physical_address) - } - } - - fn write_regs( - &self, - vcpu_fd: &Self::Vcpu, - regs: &CommonRegisters, - fpu: &CommonFpu, - ) -> Result<()> { - log::debug!("Write registers"); - let new_regs = regs.into(); - - vcpu_fd - .set_regs(&new_regs) - .map_err(|e| new_error!("Could not write guest registers: {:?}", e))?; - - // Only xmm and mxcsr is piped though in the given fpu, so only set those - let mut current_fpu: CommonFpu = (&vcpu_fd.get_fpu()?).into(); - current_fpu.mxcsr = fpu.mxcsr; - current_fpu.xmm = fpu.xmm; - vcpu_fd - .set_fpu(&(¤t_fpu).into()) - .map_err(|e| new_error!("Could not write guest registers: {:?}", e))?; - - // Read XMM registers from FPU state - // note kvm get_fpu doesn't actually set or read the mxcsr value - // https://elixir.bootlin.com/linux/v6.16/source/arch/x86/kvm/x86.c#L12229 - // Update MXCSR using XSAVE region entry 6 (MXCSR) if available. - let mut xsave = match vcpu_fd.get_xsave() { - Ok(xsave) => xsave, - Err(e) => { - return Err(new_error!("Could not write guest registers: {:?}", e)); - } - }; - - xsave.region[6] = fpu.mxcsr; - unsafe { - vcpu_fd - .set_xsave(&xsave) - .map_err(|e| new_error!("Could not write guest registers: {:?}", e))? - }; - - Ok(()) - } -} diff --git a/src/hyperlight_host/src/hypervisor/gdb/mod.rs b/src/hyperlight_host/src/hypervisor/gdb/mod.rs index 66467c931..cb866d511 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/mod.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/mod.rs @@ -16,12 +16,6 @@ limitations under the License. pub(crate) mod arch; mod event_loop; -#[cfg(target_os = "windows")] -mod hyperv_debug; -#[cfg(kvm)] -mod kvm_debug; -#[cfg(mshv3)] -mod mshv_debug; mod x86_64_target; use std::io::{self, ErrorKind}; @@ -29,24 +23,17 @@ use std::net::TcpListener; use std::sync::{Arc, Mutex}; use std::{slice, thread}; -pub(crate) use arch::{SW_BP, SW_BP_SIZE}; use crossbeam_channel::{Receiver, Sender, TryRecvError}; use event_loop::event_loop_thread; use gdbstub::conn::ConnectionExt; use gdbstub::stub::GdbStub; use gdbstub::target::TargetError; -use hyperlight_common::mem::PAGE_SIZE; -#[cfg(target_os = "windows")] -pub(crate) use hyperv_debug::HypervDebug; -#[cfg(kvm)] -pub(crate) use kvm_debug::KvmDebug; -#[cfg(mshv3)] -pub(crate) use mshv_debug::MshvDebug; use thiserror::Error; use x86_64_target::HyperlightSandboxTarget; -use super::InterruptHandle; -use crate::hypervisor::regs::{CommonFpu, CommonRegisters}; +use super::regs::CommonRegisters; +use super::{Hypervisor, InterruptHandle}; +use crate::hypervisor::regs::CommonFpu; use crate::mem::layout::SandboxMemoryLayout; use crate::mem::memory_region::MemoryRegion; use crate::mem::mgr::SandboxMemoryManager; @@ -107,7 +94,7 @@ impl DebugMemoryAccess { /// This address is shall be translated before calling this function /// # Returns /// * `Result<(), HyperlightError>` - Ok if successful, Err otherwise - fn read(&self, data: &mut [u8], gpa: u64) -> crate::Result<()> { + pub(crate) fn read(&self, data: &mut [u8], gpa: u64) -> crate::Result<()> { let read_len = data.len(); let mem_offset = (gpa as usize) @@ -172,7 +159,7 @@ impl DebugMemoryAccess { /// This address is shall be translated before calling this function /// # Returns /// * `Result<(), HyperlightError>` - Ok if successful, Err otherwise - fn write(&self, data: &[u8], gpa: u64) -> crate::Result<()> { + pub(crate) fn write(&self, data: &[u8], gpa: u64) -> crate::Result<()> { let write_len = data.len(); let mem_offset = (gpa as usize) @@ -287,165 +274,24 @@ pub(crate) enum DebugResponse { WriteRegisters, } -/// This trait is used to define common debugging functionality for Hypervisors -pub(super) trait GuestDebug { - /// Type that wraps the vCPU functionality - type Vcpu; - - /// Returns true whether the provided address is a hardware breakpoint - fn is_hw_breakpoint(&self, addr: &u64) -> bool; - /// Returns true whether the provided address is a software breakpoint - fn is_sw_breakpoint(&self, addr: &u64) -> bool; - /// Stores the address of the hw breakpoint - fn save_hw_breakpoint(&mut self, addr: &u64) -> bool; - /// Stores the data that the sw breakpoint op code replaces - fn save_sw_breakpoint_data(&mut self, addr: u64, data: [u8; 1]); - /// Deletes the address of the hw breakpoint from storage - fn delete_hw_breakpoint(&mut self, addr: &u64); - /// Retrieves the saved data that the sw breakpoint op code replaces - fn delete_sw_breakpoint_data(&mut self, addr: &u64) -> Option<[u8; 1]>; - - /// Read registers - fn read_regs(&self, vcpu_fd: &Self::Vcpu) -> crate::Result<(CommonRegisters, CommonFpu)>; - /// Enables or disables stepping and sets the vCPU debug configuration - fn set_single_step(&mut self, vcpu_fd: &Self::Vcpu, enable: bool) -> crate::Result<()>; - /// Translates the guest address to physical address - fn translate_gva(&self, vcpu_fd: &Self::Vcpu, gva: u64) -> crate::Result; - /// Write registers - fn write_regs( - &self, - vcpu_fd: &Self::Vcpu, - regs: &CommonRegisters, - fpu: &CommonFpu, - ) -> crate::Result<()>; - - /// Adds hardware breakpoint - fn add_hw_breakpoint(&mut self, vcpu_fd: &Self::Vcpu, addr: u64) -> crate::Result<()> { - let addr = self.translate_gva(vcpu_fd, addr)?; - - if self.is_hw_breakpoint(&addr) { - return Ok(()); - } - - self.save_hw_breakpoint(&addr) - .then(|| self.set_single_step(vcpu_fd, false)) - .ok_or_else(|| new_error!("Failed to save hw breakpoint"))? - } - /// Overwrites the guest memory with the SW Breakpoint op code that instructs - /// the vCPU to stop when is executed and stores the overwritten data to be - /// able to restore it - fn add_sw_breakpoint( - &mut self, - vcpu_fd: &Self::Vcpu, - addr: u64, - mem_access: &DebugMemoryAccess, - ) -> crate::Result<()> { - let addr = self.translate_gva(vcpu_fd, addr)?; - - if self.is_sw_breakpoint(&addr) { - return Ok(()); - } - - // Write breakpoint OP code to write to guest memory - let mut save_data = [0; SW_BP_SIZE]; - self.read_addrs(vcpu_fd, addr, &mut save_data[..], mem_access)?; - self.write_addrs(vcpu_fd, addr, &SW_BP, mem_access)?; - - // Save guest memory to restore when breakpoint is removed - self.save_sw_breakpoint_data(addr, save_data); - - Ok(()) - } - /// Copies the data from the guest memory address to the provided slice - /// The address is checked to be a valid guest address - fn read_addrs( - &mut self, - vcpu_fd: &Self::Vcpu, - mut gva: u64, - mut data: &mut [u8], - mem_access: &DebugMemoryAccess, - ) -> crate::Result<()> { - let data_len = data.len(); - log::debug!("Read addr: {:X} len: {:X}", gva, data_len); - - while !data.is_empty() { - let gpa = self.translate_gva(vcpu_fd, gva)?; - - let read_len = std::cmp::min( - data.len(), - (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), - ); - - mem_access.read(&mut data[..read_len], gpa)?; - - data = &mut data[read_len..]; - gva += read_len as u64; - } - - Ok(()) - } - /// Removes hardware breakpoint - fn remove_hw_breakpoint(&mut self, vcpu_fd: &Self::Vcpu, addr: u64) -> crate::Result<()> { - let addr = self.translate_gva(vcpu_fd, addr)?; - - self.is_hw_breakpoint(&addr) - .then(|| { - self.delete_hw_breakpoint(&addr); - self.set_single_step(vcpu_fd, false) - }) - .ok_or_else(|| new_error!("The address: {:?} is not a hw breakpoint", addr))? - } - /// Restores the overwritten data to the guest memory - fn remove_sw_breakpoint( - &mut self, - vcpu_fd: &Self::Vcpu, - addr: u64, - mem_access: &DebugMemoryAccess, - ) -> crate::Result<()> { - let addr = self.translate_gva(vcpu_fd, addr)?; - - if self.is_sw_breakpoint(&addr) { - let save_data = self - .delete_sw_breakpoint_data(&addr) - .ok_or_else(|| new_error!("Expected to contain the sw breakpoint address"))?; - - // Restore saved data to the guest's memory - self.write_addrs(vcpu_fd, addr, &save_data, mem_access)?; +/// Trait for VMs that support debugging capabilities. +/// This extends the base Hypervisor trait with GDB-specific functionality. +pub(crate) trait DebuggableVm: Hypervisor { + /// Translates a guest virtual address to a guest physical address + fn translate_gva(&self, gva: u64) -> crate::Result; - Ok(()) - } else { - Err(new_error!("The address: {:?} is not a sw breakpoint", addr)) - } - } - /// Copies the data from the provided slice to the guest memory address - /// The address is checked to be a valid guest address - fn write_addrs( - &mut self, - vcpu_fd: &Self::Vcpu, - mut gva: u64, - mut data: &[u8], - mem_access: &DebugMemoryAccess, - ) -> crate::Result<()> { - let data_len = data.len(); - log::debug!("Write addr: {:X} len: {:X}", gva, data_len); - - while !data.is_empty() { - let gpa = self.translate_gva(vcpu_fd, gva)?; - - let write_len = std::cmp::min( - data.len(), - (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), - ); + /// Enable/disable debugging + fn set_debug(&mut self, enable: bool) -> crate::Result<()>; - // Use the memory access to write to guest memory - mem_access.write(&data[..write_len], gpa)?; + /// Enable/disable single stepping + fn set_single_step(&mut self, enable: bool) -> crate::Result<()>; - data = &data[write_len..]; - gva += write_len as u64; - } + /// Add a hardware breakpoint at the given address. + /// Must be idempotent. + fn add_hw_breakpoint(&mut self, addr: u64) -> crate::Result<()>; - Ok(()) - } + /// Remove a hardware breakpoint at the given address + fn remove_hw_breakpoint(&mut self, addr: u64) -> crate::Result<()>; } /// Debug communication channel that is used for sending a request type and diff --git a/src/hyperlight_host/src/hypervisor/gdb/mshv_debug.rs b/src/hyperlight_host/src/hypervisor/gdb/mshv_debug.rs deleted file mode 100644 index f4d30d1d9..000000000 --- a/src/hyperlight_host/src/hypervisor/gdb/mshv_debug.rs +++ /dev/null @@ -1,237 +0,0 @@ -/* -Copyright 2025 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use std::collections::HashMap; - -use mshv_bindings::{ - DebugRegisters, HV_TRANSLATE_GVA_VALIDATE_READ, HV_TRANSLATE_GVA_VALIDATE_WRITE, -}; -use mshv_ioctls::VcpuFd; - -use super::arch::{MAX_NO_OF_HW_BP, SW_BP_SIZE, vcpu_stop_reason}; -use super::{GuestDebug, VcpuStopReason}; -use crate::hypervisor::regs::{CommonFpu, CommonRegisters}; -use crate::{HyperlightError, Result, new_error}; - -#[derive(Debug, Default)] -pub(crate) struct MshvDebug { - /// vCPU stepping state - single_step: bool, - - /// Array of addresses for HW breakpoints - hw_breakpoints: Vec, - /// Saves the bytes modified to enable SW breakpoints - sw_breakpoints: HashMap, - - /// Debug registers - dbg_cfg: DebugRegisters, -} - -impl MshvDebug { - pub(crate) fn new() -> Self { - Self { - single_step: false, - hw_breakpoints: vec![], - sw_breakpoints: HashMap::new(), - dbg_cfg: DebugRegisters::default(), - } - } - - /// Returns the instruction pointer from the stopped vCPU - fn get_instruction_pointer(&self, vcpu_fd: &VcpuFd) -> Result { - let regs = vcpu_fd - .get_regs() - .map_err(|e| new_error!("Could not retrieve registers from vCPU: {:?}", e))?; - - Ok(regs.rip) - } - - /// This method sets the vCPU debug register fields to enable breakpoints at - /// specific addresses - /// - /// The first 4 debug registers are used to set the addresses - /// The 4th and 5th debug registers are obsolete and not used - /// The 7th debug register is used to enable the breakpoints - /// For more information see: DEBUG REGISTERS chapter in the architecture - /// manual - fn set_debug_config(&mut self, vcpu_fd: &VcpuFd, step: bool) -> Result<()> { - let addrs = &self.hw_breakpoints; - - let mut dbg_cfg = DebugRegisters::default(); - for (k, addr) in addrs.iter().enumerate() { - match k { - 0 => { - dbg_cfg.dr0 = *addr; - } - 1 => { - dbg_cfg.dr1 = *addr; - } - 2 => { - dbg_cfg.dr2 = *addr; - } - 3 => { - dbg_cfg.dr3 = *addr; - } - _ => { - Err(new_error!("Tried to set more than 4 HW breakpoints"))?; - } - } - dbg_cfg.dr7 |= 1 << (k * 2); - } - - self.dbg_cfg = dbg_cfg; - vcpu_fd - .set_debug_regs(&self.dbg_cfg) - .map_err(|e| new_error!("Could not set guest debug: {:?}", e))?; - - self.single_step = step; - - let mut regs = vcpu_fd - .get_regs() - .map_err(|e| new_error!("Could not get registers: {:?}", e))?; - - // Set TF Flag to enable Traps - if self.single_step { - regs.rflags |= 1 << 8; - } else { - regs.rflags &= !(1 << 8); - } - - vcpu_fd - .set_regs(®s) - .map_err(|e| new_error!("Could not set registers: {:?}", e))?; - - Ok(()) - } - - /// Returns the vCPU stop reason - pub(crate) fn get_stop_reason( - &mut self, - vcpu_fd: &VcpuFd, - exception: u16, - entrypoint: u64, - ) -> Result { - let regs = vcpu_fd - .get_debug_regs() - .map_err(|e| new_error!("Cannot retrieve debug registers from vCPU: {}", e))?; - - // DR6 register contains debug state related information - let debug_status = regs.dr6; - - let rip = self.get_instruction_pointer(vcpu_fd)?; - let rip = self.translate_gva(vcpu_fd, rip)?; - - let reason = vcpu_stop_reason( - self.single_step, - rip, - debug_status, - entrypoint, - exception as u32, - &self.hw_breakpoints, - &self.sw_breakpoints, - ); - - if let VcpuStopReason::EntryPointBp = reason { - // In case the hw breakpoint is the entry point, remove it to - // avoid hanging here as gdb does not remove breakpoints it - // has not set. - // Gdb expects the target to be stopped when connected. - self.remove_hw_breakpoint(vcpu_fd, entrypoint)?; - } - - Ok(reason) - } -} - -impl GuestDebug for MshvDebug { - type Vcpu = VcpuFd; - - fn is_hw_breakpoint(&self, addr: &u64) -> bool { - self.hw_breakpoints.contains(addr) - } - fn is_sw_breakpoint(&self, addr: &u64) -> bool { - self.sw_breakpoints.contains_key(addr) - } - fn save_hw_breakpoint(&mut self, addr: &u64) -> bool { - if self.hw_breakpoints.len() >= MAX_NO_OF_HW_BP { - false - } else { - self.hw_breakpoints.push(*addr); - - true - } - } - fn save_sw_breakpoint_data(&mut self, addr: u64, data: [u8; 1]) { - _ = self.sw_breakpoints.insert(addr, data); - } - fn delete_hw_breakpoint(&mut self, addr: &u64) { - self.hw_breakpoints.retain(|&a| a != *addr); - } - fn delete_sw_breakpoint_data(&mut self, addr: &u64) -> Option<[u8; 1]> { - self.sw_breakpoints.remove(addr) - } - - fn read_regs(&self, vcpu_fd: &Self::Vcpu) -> Result<(CommonRegisters, CommonFpu)> { - log::debug!("Read registers"); - - let regs = vcpu_fd - .get_regs() - .map_err(|e| new_error!("Could not read guest registers: {:?}", e))?; - let regs = CommonRegisters::from(®s); - - let fpu_data = vcpu_fd - .get_fpu() - .map_err(|e| new_error!("Could not read guest FPU registers: {:?}", e))?; - let fpu = CommonFpu::from(&fpu_data); - - Ok((regs, fpu)) - } - - fn set_single_step(&mut self, vcpu_fd: &Self::Vcpu, enable: bool) -> Result<()> { - self.set_debug_config(vcpu_fd, enable) - } - - fn translate_gva(&self, vcpu_fd: &Self::Vcpu, gva: u64) -> Result { - let flags = (HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE) as u64; - let (addr, _) = vcpu_fd - .translate_gva(gva, flags) - .map_err(|_| HyperlightError::TranslateGuestAddress(gva))?; - - Ok(addr) - } - - fn write_regs( - &self, - vcpu_fd: &Self::Vcpu, - regs: &CommonRegisters, - fpu: &CommonFpu, - ) -> Result<()> { - log::debug!("Write registers"); - - vcpu_fd - .set_regs(®s.into()) - .map_err(|e| new_error!("Could not write guest registers: {:?}", e))?; - - // Only xmm and mxcsr is piped though in the given fpu, so only set those - let mut current_fpu: CommonFpu = (&vcpu_fd.get_fpu()?).into(); - current_fpu.mxcsr = fpu.mxcsr; - current_fpu.xmm = fpu.xmm; - - vcpu_fd - .set_fpu(&(¤t_fpu).into()) - .map_err(|e| new_error!("Could not write guest registers: {:?}", e)) - } -} diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs index dde4c2024..a6c42ccf0 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -1,5 +1,5 @@ /* -Copyright 2025 The Hyperlight Authors. +Copyright 2024 The Hyperlight Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,54 +14,141 @@ See the License for the specific language governing permissions and limitations under the License. */ -use crate::metrics::{METRIC_ERRONEOUS_VCPU_KICKS, METRIC_GUEST_CANCELLATION}; -use crate::{HyperlightError, Result, log_then_return, new_error}; +#[cfg(gdb)] +use std::collections::HashMap; +#[cfg(crashdump)] +use std::path::Path; +#[cfg(any(kvm, mshv3))] +use std::sync::atomic::AtomicU64; +use std::sync::atomic::{AtomicBool, AtomicU8}; +use std::sync::{Arc, Mutex}; +use log::LevelFilter; +use tracing::{Span, instrument}; +#[cfg(feature = "trace_guest")] +use tracing_opentelemetry::OpenTelemetrySpanExt; + +#[cfg(target_os = "windows")] +use super::WindowsInterruptHandle; +#[cfg(gdb)] +use super::gdb::{DebugCommChannel, DebugMsg, DebugResponse, DebuggableVm, VcpuStopReason, arch}; +use super::regs::{CommonFpu, CommonRegisters}; +use crate::HyperlightError::{ExecutionCanceledByHost, NoHypervisorFound}; +#[cfg(not(gdb))] +use crate::hypervisor::Hypervisor; #[cfg(any(kvm, mshv3))] use crate::hypervisor::LinuxInterruptHandle; -use crate::hypervisor::{ - HyperlightExit, Hypervisor, InterruptHandle, InterruptHandleImpl, MemoryAccess, - get_memory_access_violation, -}; -use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; +#[cfg(crashdump)] +use crate::hypervisor::crashdump; +#[cfg(mshv3)] +use crate::hypervisor::hyperv_linux::MshvVm; +#[cfg(target_os = "windows")] +use crate::hypervisor::hyperv_windows::WhpVm; +#[cfg(kvm)] +use crate::hypervisor::kvm::KvmVm; +use crate::hypervisor::regs::CommonSpecialRegisters; +#[cfg(target_os = "windows")] +use crate::hypervisor::wrappers::HandleWrapper; +use crate::hypervisor::{HyperlightExit, InterruptHandle, InterruptHandleImpl, get_max_log_level}; +use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType}; use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::RawPtr; +use crate::mem::ptr::{GuestPtr, RawPtr}; use crate::mem::shared_mem::HostSharedMemory; +use crate::metrics::{METRIC_ERRONEOUS_VCPU_KICKS, METRIC_GUEST_CANCELLATION}; use crate::sandbox::SandboxConfiguration; use crate::sandbox::host_funcs::FunctionRegistry; +use crate::sandbox::hypervisor::{HypervisorType, get_available_hypervisor}; use crate::sandbox::outb::handle_outb; +#[cfg(feature = "mem_profile")] +use crate::sandbox::trace::MemTraceInfo; +#[cfg(crashdump)] +use crate::sandbox::uninitialized::SandboxRuntimeConfig; +use crate::{HyperlightError, Result, log_then_return, new_error}; -use log::LevelFilter; -#[cfg(any(kvm, mshv3))] -#[cfg(not(all( - target_arch = "x86_64", - target_vendor = "unknown", - target_os = "linux", - target_env = "musl" -)))] -use std::sync::atomic::AtomicU64; -#[cfg(any(kvm, mshv3))] -use std::sync::atomic::{AtomicBool, AtomicU8}; -use std::sync::{Arc, Mutex}; -use tracing::{Span, instrument}; - -#[derive(Debug)] pub(crate) struct HyperlightVm { - pub(crate) vm: Box, + #[cfg(gdb)] + vm: Box, + #[cfg(not(gdb))] + vm: Box, + page_size: usize, + entrypoint: u64, + orig_rsp: GuestPtr, interrupt_handle: Arc, sandbox_regions: Vec, // Initially mapped regions when sandbox is created mmap_regions: Vec<(u32, MemoryRegion)>, // Later mapped regions (slot number, region) next_slot: u32, // Monotonically increasing slot number freed_slots: Vec, // Reusable slots from unmapped regions + + #[cfg(gdb)] + gdb_conn: Option>, + #[cfg(gdb)] + sw_breakpoints: HashMap, // addr -> original instruction + #[cfg(feature = "mem_profile")] + trace_info: MemTraceInfo, + #[cfg(crashdump)] + rt_cfg: SandboxRuntimeConfig, } impl HyperlightVm { + /// Create a new HyperlightVm instance (will not run vm until calling `initialise`) + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + #[allow(clippy::too_many_arguments)] pub(crate) fn new( - mut vm: Box, mem_regions: Vec, - config: &SandboxConfiguration, + _pml4_addr: u64, + entrypoint: u64, + rsp: u64, + #[cfg_attr(not(any(kvm, mshv3)), allow(unused_variables))] config: &SandboxConfiguration, + #[cfg(target_os = "windows")] handle: HandleWrapper, + #[cfg(target_os = "windows")] raw_size: usize, + #[cfg(gdb)] gdb_conn: Option>, + #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, + #[cfg(feature = "mem_profile")] trace_info: MemTraceInfo, ) -> Result { + #[cfg(gdb)] + type VmType = Box; + #[cfg(not(gdb))] + type VmType = Box; + + #[allow(unused_mut)] // needs to be mutable when gdb is enabled + let mut vm: VmType = match get_available_hypervisor() { + #[cfg(kvm)] + Some(HypervisorType::Kvm) => Box::new(KvmVm::new()?), + #[cfg(mshv3)] + Some(HypervisorType::Mshv) => Box::new(MshvVm::new()?), + #[cfg(target_os = "windows")] + Some(HypervisorType::Whp) => Box::new(WhpVm::new(handle, raw_size)?), + None => return Err(NoHypervisorFound()), + }; + + for (i, region) in mem_regions.iter().enumerate() { + // Safety: slots are unique and region points to valid memory since we created the regions + unsafe { vm.map_memory((i as u32, region))? }; + } + + // Mark initial setup as complete for Windows - subsequent map_memory calls will fail + #[cfg(target_os = "windows")] + vm.complete_initial_memory_setup(); + + #[cfg(feature = "init-paging")] + vm.set_sregs(&CommonSpecialRegisters::standard_64bit_defaults(_pml4_addr))?; + #[cfg(not(feature = "init-paging"))] + vm.set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults())?; + + #[cfg(gdb)] + let gdb_conn = if let Some(gdb_conn) = gdb_conn { + // Add breakpoint to the entry point address + vm.set_debug(true)?; + vm.add_hw_breakpoint(entrypoint)?; + + Some(gdb_conn) + } else { + None + }; + + let rsp_gp = GuestPtr::try_from(RawPtr::from(rsp))?; + #[cfg(any(kvm, mshv3))] let interrupt_handle: Arc = Arc::new(LinuxInterruptHandle { state: AtomicU8::new(0), @@ -91,21 +178,41 @@ impl HyperlightVm { dropped: AtomicBool::new(false), }); - for (i, region) in mem_regions.iter().enumerate() { - // Safety: slots are unique and region points to valid memory since we created the regions - unsafe { vm.map_memory((i as u32, region))? }; - } - - Ok(Self { + #[allow(unused_mut)] // needs to be mutable when gdb is enabled + let mut ret = Self { vm, + entrypoint, + orig_rsp: rsp_gp, interrupt_handle, + page_size: 0, // Will be set in `initialise` + next_slot: mem_regions.len() as u32, sandbox_regions: mem_regions, mmap_regions: Vec::new(), freed_slots: Vec::new(), - }) + + #[cfg(gdb)] + gdb_conn, + #[cfg(gdb)] + sw_breakpoints: HashMap::new(), + #[cfg(feature = "mem_profile")] + trace_info, + #[cfg(crashdump)] + rt_cfg, + }; + + // Send the interrupt handle to the GDB thread if debugging is enabled + // This is used to allow the GDB thread to stop the vCPU + #[cfg(gdb)] + if ret.gdb_conn.is_some() { + ret.send_dbg_msg(DebugResponse::InterruptHandle(ret.interrupt_handle.clone()))?; + } + + Ok(ret) } + /// Initialise the HyperlightVm (will run vm). + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] #[allow(clippy::too_many_arguments)] pub(crate) fn initialise( &mut self, @@ -114,20 +221,69 @@ impl HyperlightVm { page_size: u32, mem_mgr: &mut SandboxMemoryManager, host_funcs: &Arc>, - guest_max_log_level: Option, + max_guest_log_level: Option, #[cfg(gdb)] dbg_mem_access_fn: Arc>>, ) -> Result<()> { - self.vm.initialise( - peb_addr, - seed, - page_size, + self.page_size = page_size as usize; + + let max_guest_log_level: u64 = match max_guest_log_level { + Some(level) => level as u64, + None => get_max_log_level().into(), + }; + + let regs = CommonRegisters { + rip: self.entrypoint, + rsp: self.orig_rsp.absolute()?, + + // function args + rdi: peb_addr.into(), + rsi: seed, + rdx: page_size.into(), + rcx: max_guest_log_level, + rflags: 1 << 1, + + ..Default::default() + }; + self.vm.set_regs(®s)?; + + self.run( mem_mgr, host_funcs, - guest_max_log_level, #[cfg(gdb)] dbg_mem_access_fn, )?; - self.run(mem_mgr, host_funcs) + + Ok(()) + } + + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + pub(crate) fn dispatch_call_from_host( + &mut self, + dispatch_func_addr: RawPtr, + mem_mgr: &mut SandboxMemoryManager, + host_funcs: &Arc>, + #[cfg(gdb)] dbg_mem_access_fn: Arc>>, + ) -> Result<()> { + // set RIP and RSP, reset others + let regs = CommonRegisters { + rip: dispatch_func_addr.into(), + rsp: self.orig_rsp.absolute()?, + rflags: 1 << 1, + ..Default::default() + }; + self.vm.set_regs(®s)?; + + // reset fpu + self.vm.set_fpu(&CommonFpu::default())?; + + self.run( + mem_mgr, + host_funcs, + #[cfg(gdb)] + dbg_mem_access_fn, + )?; + + Ok(()) } // Safety: The caller must ensure that the memory region is valid and points to valid memory, @@ -163,31 +319,6 @@ impl HyperlightVm { self.mmap_regions.iter().map(|(_, region)| region) } - pub(crate) fn dispatch_call_from_host( - &mut self, - dispatch_func_addr: RawPtr, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - #[cfg(gdb)] dbg_mem_access_fn: Arc>>, - ) -> Result<()> { - self.vm.dispatch_call_from_host( - dispatch_func_addr, - mem_mgr, - host_funcs, - #[cfg(gdb)] - dbg_mem_access_fn, - )?; - self.run(mem_mgr, host_funcs) - } - - pub(crate) fn interrupt_handle(&self) -> Arc { - self.vm.interrupt_handle() - } - - pub(crate) fn clear_cancel(&self) { - self.vm.clear_cancel() - } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn handle_io( &mut self, @@ -390,7 +521,7 @@ impl HyperlightVm { } metrics::counter!(METRIC_GUEST_CANCELLATION).increment(1); - break Err(HyperlightError::ExecutionCanceledByHost()); + break Err(ExecutionCanceledByHost()); } Ok(HyperlightExit::Unknown(reason)) => { break Err(new_error!("Unexpected VM Exit: {:?}", reason)); @@ -425,4 +556,511 @@ impl HyperlightVm { } } } + + pub(crate) fn interrupt_handle(&self) -> Arc { + self.interrupt_handle.clone() + } + + pub(crate) fn clear_cancel(&self) { + self.interrupt_handle.clear_cancel(); + } + + #[cfg(gdb)] + fn handle_debug( + &mut self, + dbg_mem_access_fn: Arc>>, + stop_reason: VcpuStopReason, + ) -> Result<()> { + use crate::hypervisor::gdb::DebugMemoryAccess; + + if self.gdb_conn.is_none() { + return Err(new_error!("Debugging is not enabled")); + } + + let mem_access = DebugMemoryAccess { + dbg_mem_access_fn, + guest_mmap_regions: self.mmap_regions.iter().map(|(_, r)| r.clone()).collect(), + }; + + match stop_reason { + // If the vCPU stopped because of a crash, we need to handle it differently + // We do not want to allow resuming execution or placing breakpoints + // because the guest has crashed. + // We only allow reading registers and memory + VcpuStopReason::Crash => { + self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) + .map_err(|e| { + new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e) + })?; + + loop { + log::debug!("Debug wait for event to resume vCPU"); + // Wait for a message from gdb + let req = self.recv_dbg_msg()?; + + // Flag to store if we should deny continue or step requests + let mut deny_continue = false; + // Flag to store if we should detach from the gdb session + let mut detach = false; + + let response = match req { + // Allow the detach request to disable debugging by continuing resuming + // hypervisor crash error reporting + DebugMsg::DisableDebug => { + detach = true; + DebugResponse::DisableDebug + } + // Do not allow continue or step requests + DebugMsg::Continue | DebugMsg::Step => { + deny_continue = true; + DebugResponse::NotAllowed + } + // Do not allow adding/removing breakpoints and writing to memory or registers + DebugMsg::AddHwBreakpoint(_) + | DebugMsg::AddSwBreakpoint(_) + | DebugMsg::RemoveHwBreakpoint(_) + | DebugMsg::RemoveSwBreakpoint(_) + | DebugMsg::WriteAddr(_, _) + | DebugMsg::WriteRegisters(_) => DebugResponse::NotAllowed, + + // For all other requests, we will process them normally + _ => { + let result = self.process_dbg_request(req, &mem_access); + match result { + Ok(response) => response, + Err(HyperlightError::TranslateGuestAddress(_)) => { + // Treat non fatal errors separately so the guest doesn't fail + DebugResponse::ErrorOccurred + } + Err(e) => { + log::error!("Error processing debug request: {:?}", e); + return Err(e); + } + } + } + }; + + // Send the response to the request back to gdb + self.send_dbg_msg(response) + .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; + + // If we are denying continue or step requests, the debugger assumes the + // execution started so we need to report a stop reason as a crash and let + // it request to read registers/memory to figure out what happened + if deny_continue { + self.send_dbg_msg(DebugResponse::VcpuStopped(VcpuStopReason::Crash)) + .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; + } + + // If we are detaching, we will break the loop and the Hypervisor will continue + // to handle the Crash reason + if detach { + break; + } + } + } + // If the vCPU stopped because of any other reason except a crash, we can handle it + // normally + _ => { + // Send the stop reason to the gdb thread + self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) + .map_err(|e| { + new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e) + })?; + + loop { + log::debug!("Debug wait for event to resume vCPU"); + // Wait for a message from gdb + let req = self.recv_dbg_msg()?; + + let result = self.process_dbg_request(req, &mem_access); + + let response = match result { + Ok(response) => response, + // Treat non fatal errors separately so the guest doesn't fail + Err(HyperlightError::TranslateGuestAddress(_)) => { + DebugResponse::ErrorOccurred + } + Err(e) => { + return Err(e); + } + }; + + let cont = matches!( + response, + DebugResponse::Continue | DebugResponse::Step | DebugResponse::DisableDebug + ); + + self.send_dbg_msg(response) + .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; + + // Check if we should continue execution + // We continue if the response is one of the following: Step, Continue, or DisableDebug + if cont { + break; + } + } + } + } + + Ok(()) + } + + // -------------------------- + // --- CRASHDUMP BELOW ------ + // -------------------------- + + #[cfg(crashdump)] + pub(crate) fn crashdump_context(&self) -> Result> { + if self.rt_cfg.guest_core_dump { + let mut regs = [0; 27]; + + let vcpu_regs = self.vm.regs()?; + let sregs = self.vm.sregs()?; + let xsave = self.vm.xsave()?; + + // Set up the registers for the crash dump + regs[0] = vcpu_regs.r15; // r15 + regs[1] = vcpu_regs.r14; // r14 + regs[2] = vcpu_regs.r13; // r13 + regs[3] = vcpu_regs.r12; // r12 + regs[4] = vcpu_regs.rbp; // rbp + regs[5] = vcpu_regs.rbx; // rbx + regs[6] = vcpu_regs.r11; // r11 + regs[7] = vcpu_regs.r10; // r10 + regs[8] = vcpu_regs.r9; // r9 + regs[9] = vcpu_regs.r8; // r8 + regs[10] = vcpu_regs.rax; // rax + regs[11] = vcpu_regs.rcx; // rcx + regs[12] = vcpu_regs.rdx; // rdx + regs[13] = vcpu_regs.rsi; // rsi + regs[14] = vcpu_regs.rdi; // rdi + regs[15] = 0; // orig rax + regs[16] = vcpu_regs.rip; // rip + regs[17] = sregs.cs.selector as u64; // cs + regs[18] = vcpu_regs.rflags; // eflags + regs[19] = vcpu_regs.rsp; // rsp + regs[20] = sregs.ss.selector as u64; // ss + regs[21] = sregs.fs.base; // fs_base + regs[22] = sregs.gs.base; // gs_base + regs[23] = sregs.ds.selector as u64; // ds + regs[24] = sregs.es.selector as u64; // es + regs[25] = sregs.fs.selector as u64; // fs + regs[26] = sregs.gs.selector as u64; // gs + + // Get the filename from the binary path + let filename = self.rt_cfg.binary_path.clone().and_then(|path| { + Path::new(&path) + .file_name() + .and_then(|name| name.to_os_string().into_string().ok()) + }); + + // Include both initial sandbox regions and dynamically mapped regions + let mut regions: Vec = self.sandbox_regions.clone(); + regions.extend(self.mmap_regions.iter().map(|(_, r)| r).cloned()); + Ok(Some(crashdump::CrashDumpContext::new( + regions, + regs, + xsave.to_vec(), + self.entrypoint, + self.rt_cfg.binary_path.clone(), + filename, + ))) + } else { + Ok(None) + } + } +} + +impl Drop for HyperlightVm { + fn drop(&mut self) { + self.interrupt_handle.set_dropped(); + } +} + +/// The vCPU tried to access the given addr +enum MemoryAccess { + /// The accessed region has the given flags + AccessViolation(MemoryRegionFlags), + /// The accessed region is a stack guard page + StackGuardPageViolation, +} + +/// Determines if a known memory access violation occurred at the given address with the given action type. +/// Returns Some(reason) if violation reason could be determined, or None if violation occurred but in unmapped region. +fn get_memory_access_violation<'a>( + gpa: usize, + tried: MemoryRegionFlags, + mut mem_regions: impl Iterator, +) -> Option { + // find the region containing the given gpa + let region = mem_regions.find(|region| region.guest_region.contains(&gpa)); + + if let Some(region) = region { + if region.region_type == MemoryRegionType::GuardPage { + return Some(MemoryAccess::StackGuardPageViolation); + } else if !region.flags.contains(tried) { + return Some(MemoryAccess::AccessViolation(region.flags)); + } + } + None +} + +#[cfg(gdb)] +mod debug { + use hyperlight_common::mem::PAGE_SIZE; + + use super::HyperlightVm; + use crate::hypervisor::gdb::arch::{SW_BP, SW_BP_SIZE}; + use crate::hypervisor::gdb::{DebugMemoryAccess, DebugMsg, DebugResponse}; + use crate::{Result, new_error}; + + impl HyperlightVm { + pub(crate) fn process_dbg_request( + &mut self, + req: DebugMsg, + mem_access: &DebugMemoryAccess, + ) -> Result { + if self.gdb_conn.is_some() { + match req { + DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint( + self.vm + .add_hw_breakpoint(addr) + .map_err(|e| { + log::error!("Failed to add hw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint( + self.add_sw_breakpoint(addr, mem_access) + .map_err(|e| { + log::error!("Failed to add sw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::Continue => { + self.vm.set_single_step(false).map_err(|e| { + log::error!("Failed to continue execution: {:?}", e); + + e + })?; + + Ok(DebugResponse::Continue) + } + DebugMsg::DisableDebug => { + self.vm.set_debug(false).map_err(|e| { + log::error!("Failed to disable debugging: {:?}", e); + e + })?; + + Ok(DebugResponse::DisableDebug) + } + DebugMsg::GetCodeSectionOffset => { + let offset = mem_access + .dbg_mem_access_fn + .try_lock() + .map_err(|e| { + new_error!("Error locking at {}:{}: {}", file!(), line!(), e) + })? + .layout + .get_guest_code_address(); + + Ok(DebugResponse::GetCodeSectionOffset(offset as u64)) + } + DebugMsg::ReadAddr(addr, len) => { + let mut data = vec![0u8; len]; + + self.read_addrs(addr, &mut data, mem_access).map_err(|e| { + log::error!("Failed to read from address: {:?}", e); + + e + })?; + + Ok(DebugResponse::ReadAddr(data)) + } + DebugMsg::ReadRegisters => { + let regs = self.vm.regs()?; + let fpu = self.vm.fpu()?; + Ok(DebugResponse::ReadRegisters(Box::new((regs, fpu)))) + } + DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint( + self.vm + .remove_hw_breakpoint(addr) + .map_err(|e| { + log::error!("Failed to remove hw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint( + self.remove_sw_breakpoint(addr, mem_access) + .map_err(|e| { + log::error!("Failed to remove sw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::Step => { + self.vm.set_single_step(true).map_err(|e| { + log::error!("Failed to enable step instruction: {:?}", e); + + e + })?; + + Ok(DebugResponse::Step) + } + DebugMsg::WriteAddr(addr, data) => { + self.write_addrs(addr, &data, mem_access).map_err(|e| { + log::error!("Failed to write to address: {:?}", e); + + e + })?; + + Ok(DebugResponse::WriteAddr) + } + DebugMsg::WriteRegisters(boxed_regs) => { + let (regs, fpu) = boxed_regs.as_ref(); + self.vm.set_regs(regs)?; + self.vm.set_fpu(fpu)?; + + Ok(DebugResponse::WriteRegisters) + } + } + } else { + Err(new_error!("Debugging is not enabled")) + } + } + + pub(crate) fn recv_dbg_msg(&mut self) -> Result { + let gdb_conn = self + .gdb_conn + .as_mut() + .ok_or_else(|| new_error!("Debug is not enabled"))?; + + gdb_conn.recv().map_err(|e| { + new_error!( + "Got an error while waiting to receive a message from the gdb thread: {:?}", + e + ) + }) + } + + pub(crate) fn send_dbg_msg(&mut self, cmd: DebugResponse) -> Result<()> { + log::debug!("Sending {:?}", cmd); + + let gdb_conn = self + .gdb_conn + .as_mut() + .ok_or_else(|| new_error!("Debug is not enabled"))?; + + gdb_conn.send(cmd).map_err(|e| { + new_error!( + "Got an error while sending a response message to the gdb thread: {:?}", + e + ) + }) + } + + fn read_addrs( + &mut self, + mut gva: u64, + mut data: &mut [u8], + mem_access: &DebugMemoryAccess, + ) -> crate::Result<()> { + let data_len = data.len(); + log::debug!("Read addr: {:X} len: {:X}", gva, data_len); + + while !data.is_empty() { + let gpa = self.vm.translate_gva(gva)?; + + let read_len = std::cmp::min( + data.len(), + (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), + ); + + mem_access.read(&mut data[..read_len], gpa)?; + + data = &mut data[read_len..]; + gva += read_len as u64; + } + + Ok(()) + } + + /// Copies the data from the provided slice to the guest memory address + /// The address is checked to be a valid guest address + fn write_addrs( + &mut self, + mut gva: u64, + mut data: &[u8], + mem_access: &DebugMemoryAccess, + ) -> crate::Result<()> { + let data_len = data.len(); + log::debug!("Write addr: {:X} len: {:X}", gva, data_len); + + while !data.is_empty() { + let gpa = self.vm.translate_gva(gva)?; + + let write_len = std::cmp::min( + data.len(), + (PAGE_SIZE - (gpa & (PAGE_SIZE - 1))).try_into().unwrap(), + ); + + // Use the memory access to write to guest memory + mem_access.write(&data[..write_len], gpa)?; + + data = &data[write_len..]; + gva += write_len as u64; + } + + Ok(()) + } + + // Must be idempotent! + fn add_sw_breakpoint( + &mut self, + addr: u64, + mem_access: &DebugMemoryAccess, + ) -> crate::Result<()> { + let addr = self.vm.translate_gva(addr)?; + + // Check if breakpoint already exists + if self.sw_breakpoints.contains_key(&addr) { + return Ok(()); + } + + // Write breakpoint OP code to write to guest memory + let mut save_data = [0; SW_BP_SIZE]; + self.read_addrs(addr, &mut save_data[..], mem_access)?; + self.write_addrs(addr, &SW_BP, mem_access)?; + + // Save guest memory to restore when breakpoint is removed + self.sw_breakpoints.insert(addr, save_data[0]); + + Ok(()) + } + + fn remove_sw_breakpoint( + &mut self, + addr: u64, + mem_access: &DebugMemoryAccess, + ) -> crate::Result<()> { + let addr = self.vm.translate_gva(addr)?; + + if let Some(saved_data) = self.sw_breakpoints.remove(&addr) { + // Restore saved data to the guest's memory + self.write_addrs(addr, &[saved_data], mem_access)?; + + Ok(()) + } else { + Err(new_error!("The address: {:?} is not a sw breakpoint", addr)) + } + } + } } diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index 08e559156..968506816 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -14,240 +14,28 @@ See the License for the specific language governing permissions and limitations under the License. */ -use std::fmt::{Debug, Formatter}; -use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64}; -use std::sync::{Arc, Mutex}; +#[cfg(gdb)] +use std::fmt::Debug; +use std::sync::LazyLock; -use log::{LevelFilter, error}; +#[cfg(gdb)] +use mshv_bindings::{DebugRegisters, hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT}; use mshv_bindings::{ - FloatingPointUnit, SpecialRegisters, StandardRegisters, hv_message_type, - hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, + hv_message_type, hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, hv_partition_synthetic_processor_features, hv_register_assoc, hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_user_mem_region, }; -#[cfg(gdb)] -use mshv_bindings::{ - HV_INTERCEPT_ACCESS_MASK_EXECUTE, hv_intercept_parameters, - hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT, - mshv_install_intercept, -}; use mshv_ioctls::{Mshv, VcpuFd, VmFd}; use tracing::{Span, instrument}; -#[cfg(feature = "trace_guest")] -use tracing_opentelemetry::OpenTelemetrySpanExt; -#[cfg(crashdump)] -use {super::crashdump, std::path::Path}; #[cfg(gdb)] -use super::gdb::{ - DebugCommChannel, DebugMemoryAccess, DebugMsg, DebugResponse, GuestDebug, MshvDebug, - VcpuStopReason, -}; -use super::{Hypervisor, LinuxInterruptHandle}; -#[cfg(gdb)] -use crate::HyperlightError; -use crate::hypervisor::regs::CommonFpu; -use crate::hypervisor::{InterruptHandle, InterruptHandleImpl, HyperlightExit}; +use crate::hypervisor::gdb::DebuggableVm; +use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; +use crate::hypervisor::{HyperlightExit, Hypervisor}; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; -use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::{GuestPtr, RawPtr}; -use crate::mem::shared_mem::HostSharedMemory; -use crate::sandbox::SandboxConfiguration; -use crate::sandbox::host_funcs::FunctionRegistry; -use crate::sandbox::outb::handle_outb; -#[cfg(feature = "mem_profile")] -use crate::sandbox::trace::MemTraceInfo; -#[cfg(crashdump)] -use crate::sandbox::uninitialized::SandboxRuntimeConfig; -use crate::{Result, log_then_return, new_error}; - -#[cfg(gdb)] -mod debug { - use mshv_bindings::hv_x64_exception_intercept_message; - - use super::{HypervLinuxDriver, *}; - use crate::hypervisor::gdb::{DebugMemoryAccess, DebugMsg, DebugResponse, VcpuStopReason}; - use crate::{Result, new_error}; - - impl HypervLinuxDriver { - /// Resets the debug information to disable debugging - fn disable_debug(&mut self) -> Result<()> { - let mut debug = MshvDebug::default(); - - debug.set_single_step(&self.vcpu_fd, false)?; - - self.debug = Some(debug); - - Ok(()) - } - - /// Get the reason the vCPU has stopped - pub(crate) fn get_stop_reason( - &mut self, - ex_info: hv_x64_exception_intercept_message, - ) -> Result { - let debug = self - .debug - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - debug.get_stop_reason(&self.vcpu_fd, ex_info.exception_vector, self.entrypoint) - } - - pub(crate) fn process_dbg_request( - &mut self, - req: DebugMsg, - mem_access: &DebugMemoryAccess, - ) -> Result { - if let Some(debug) = self.debug.as_mut() { - match req { - DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint( - debug - .add_hw_breakpoint(&self.vcpu_fd, addr) - .map_err(|e| { - log::error!("Failed to add hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint( - debug - .add_sw_breakpoint(&self.vcpu_fd, addr, mem_access) - .map_err(|e| { - log::error!("Failed to add sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Continue => { - debug.set_single_step(&self.vcpu_fd, false).map_err(|e| { - log::error!("Failed to continue execution: {:?}", e); - - e - })?; - - Ok(DebugResponse::Continue) - } - DebugMsg::DisableDebug => { - self.disable_debug().map_err(|e| { - log::error!("Failed to disable debugging: {:?}", e); - - e - })?; - - Ok(DebugResponse::DisableDebug) - } - DebugMsg::GetCodeSectionOffset => { - let offset = mem_access - .dbg_mem_access_fn - .try_lock() - .map_err(|e| { - new_error!("Error locking at {}:{}: {}", file!(), line!(), e) - })? - .layout - .get_guest_code_address(); - - Ok(DebugResponse::GetCodeSectionOffset(offset as u64)) - } - DebugMsg::ReadAddr(addr, len) => { - let mut data = vec![0u8; len]; - - debug.read_addrs(&self.vcpu_fd, addr, &mut data, mem_access)?; - - Ok(DebugResponse::ReadAddr(data)) - } - DebugMsg::ReadRegisters => debug - .read_regs(&self.vcpu_fd) - .map_err(|e| { - log::error!("Failed to read registers: {:?}", e); - - e - }) - .map(|(regs, fpu)| DebugResponse::ReadRegisters(Box::new((regs, fpu)))), - DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint( - debug - .remove_hw_breakpoint(&self.vcpu_fd, addr) - .map_err(|e| { - log::error!("Failed to remove hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint( - debug - .remove_sw_breakpoint(&self.vcpu_fd, addr, mem_access) - .map_err(|e| { - log::error!("Failed to remove sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Step => { - debug.set_single_step(&self.vcpu_fd, true).map_err(|e| { - log::error!("Failed to enable step instruction: {:?}", e); - - e - })?; - - Ok(DebugResponse::Step) - } - DebugMsg::WriteAddr(addr, data) => { - debug.write_addrs(&self.vcpu_fd, addr, &data, mem_access)?; - - Ok(DebugResponse::WriteAddr) - } - DebugMsg::WriteRegisters(boxed_regs) => { - let (regs, fpu) = boxed_regs.as_ref(); - debug - .write_regs(&self.vcpu_fd, regs, fpu) - .map_err(|e| { - log::error!("Failed to write registers: {:?}", e); - - e - }) - .map(|_| DebugResponse::WriteRegisters) - } - } - } else { - Err(new_error!("Debugging is not enabled")) - } - } - - pub(crate) fn recv_dbg_msg(&mut self) -> Result { - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - gdb_conn.recv().map_err(|e| { - new_error!( - "Got an error while waiting to receive a - message: {:?}", - e - ) - }) - } - - pub(crate) fn send_dbg_msg(&mut self, cmd: DebugResponse) -> Result<()> { - log::debug!("Sending {:?}", cmd); - - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - gdb_conn - .send(cmd) - .map_err(|e| new_error!("Got an error while sending a response message {:?}", e)) - } - } -} +use crate::{Result, new_error}; /// Determine whether the HyperV for Linux hypervisor API is present /// and functional. @@ -262,51 +50,24 @@ pub(crate) fn is_hypervisor_present() -> bool { } } -/// A Hypervisor driver for HyperV-on-Linux. This hypervisor is often -/// called the Microsoft Hypervisor (MSHV) -pub(crate) struct HypervLinuxDriver { - _mshv: Mshv, - page_size: usize, +/// A MSHV implementation of a single-vcpu VM +#[derive(Debug)] +pub(crate) struct MshvVm { vm_fd: VmFd, vcpu_fd: VcpuFd, - orig_rsp: GuestPtr, - entrypoint: u64, - interrupt_handle: Arc, - - #[cfg(gdb)] - debug: Option, - #[cfg(gdb)] - gdb_conn: Option>, - #[cfg(crashdump)] - rt_cfg: SandboxRuntimeConfig, - #[cfg(feature = "mem_profile")] - trace_info: MemTraceInfo, } -impl HypervLinuxDriver { - /// Create a new `HypervLinuxDriver`, complete with all registers - /// set up to execute a Hyperlight binary inside a HyperV-powered - /// sandbox on Linux. - /// - /// While registers are set up, they will not have been applied to - /// the underlying virtual CPU after this function returns. Call the - /// `apply_registers` method to do that, or more likely call - /// `initialise` to do it for you. - #[allow(clippy::too_many_arguments)] - // TODO: refactor this function to take fewer arguments. Add trace_info to rt_cfg +static MSHV: LazyLock> = + LazyLock::new(|| Mshv::new().map_err(|e| new_error!("Failed to open /dev/mshv: {}", e))); + +impl MshvVm { + /// Create a new instance of a MshvVm #[instrument(skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn new( - entrypoint_ptr: GuestPtr, - rsp_ptr: GuestPtr, - pml4_ptr: GuestPtr, - config: &SandboxConfiguration, - #[cfg(gdb)] gdb_conn: Option>, - #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, - #[cfg(feature = "mem_profile")] trace_info: MemTraceInfo, - ) -> Result { - let mshv = Mshv::new()?; + pub(crate) fn new() -> Result { + let mshv = MSHV + .as_ref() + .map_err(|e| new_error!("Failed to create MSHV instance: {}", e))?; let pr = Default::default(); - let vm_fd = { // It's important to avoid create_vm() and explicitly use // create_vm_with_args() with an empty arguments structure @@ -324,155 +85,43 @@ impl HypervLinuxDriver { let vcpu_fd = vm_fd.create_vcpu(0)?; - #[cfg(gdb)] - let (debug, gdb_conn) = if let Some(gdb_conn) = gdb_conn { - let mut debug = MshvDebug::new(); - debug.add_hw_breakpoint(&vcpu_fd, entrypoint_ptr.absolute()?)?; - - // The bellow intercepts make the vCPU exit with the Exception Intercept exit code - // Check Table 6-1. Exceptions and Interrupts at Page 6-13 Vol. 1 - // of Intel 64 and IA-32 Architectures Software Developer's Manual - // Install intercept for #DB (1) exception - vm_fd - .install_intercept(mshv_install_intercept { - access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE, - intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, - // Exception handler #DB (1) - intercept_parameter: hv_intercept_parameters { - exception_vector: 0x1, - }, - }) - .map_err(|e| new_error!("Cannot install debug exception intercept: {}", e))?; - - // Install intercept for #BP (3) exception - vm_fd - .install_intercept(mshv_install_intercept { - access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE, - intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, - // Exception handler #BP (3) - intercept_parameter: hv_intercept_parameters { - exception_vector: 0x3, - }, - }) - .map_err(|e| new_error!("Cannot install breakpoint exception intercept: {}", e))?; - - (Some(debug), Some(gdb_conn)) - } else { - (None, None) - }; - - let interrupt_handle: Arc = Arc::new(LinuxInterruptHandle { - state: AtomicU8::new(0), - #[cfg(all( - target_arch = "x86_64", - target_vendor = "unknown", - target_os = "linux", - target_env = "musl" - ))] - tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }), - #[cfg(not(all( - target_arch = "x86_64", - target_vendor = "unknown", - target_os = "linux", - target_env = "musl" - )))] - tid: AtomicU64::new(unsafe { libc::pthread_self() }), - retry_delay: config.get_interrupt_retry_delay(), - sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(), - dropped: AtomicBool::new(false), - }); - - let mut hv = Self { - _mshv: mshv, - page_size: 0, - vm_fd, - vcpu_fd, - entrypoint: entrypoint_ptr.absolute()?, - orig_rsp: rsp_ptr, - interrupt_handle: interrupt_handle.clone(), - #[cfg(gdb)] - debug, - #[cfg(gdb)] - gdb_conn, - #[cfg(crashdump)] - rt_cfg, - #[cfg(feature = "mem_profile")] - trace_info, - }; - - hv.setup_initial_sregs(pml4_ptr.absolute()?)?; - - // Send the interrupt handle to the GDB thread if debugging is enabled - // This is used to allow the GDB thread to stop the vCPU - #[cfg(gdb)] - if hv.debug.is_some() { - hv.send_dbg_msg(DebugResponse::InterruptHandle(interrupt_handle))?; - } - - Ok(hv) + Ok(Self { vm_fd, vcpu_fd }) } } -impl Debug for HypervLinuxDriver { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let mut f = f.debug_struct("Hyperv Linux Driver"); - - f.field("Entrypoint", &self.entrypoint) - .field("Original RSP", &self.orig_rsp); - - let regs = self.vcpu_fd.get_regs(); - - if let Ok(regs) = regs { - f.field("Registers", ®s); - } - - let sregs = self.vcpu_fd.get_sregs(); - - if let Ok(sregs) = sregs { - f.field("Special Registers", &sregs); - } - - f.finish() +impl Hypervisor for MshvVm { + fn regs(&self) -> Result { + Ok((&self.vcpu_fd.get_regs()?).into()) } -} -impl Hypervisor for HypervLinuxDriver { - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn initialise( - &mut self, - peb_addr: RawPtr, - seed: u64, - page_size: u32, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - max_guest_log_level: Option, - #[cfg(gdb)] dbg_mem_access_fn: Arc>>, - ) -> Result<()> { - self.page_size = page_size as usize; - - let max_guest_log_level: u64 = match max_guest_log_level { - Some(level) => level as u64, - None => self.get_max_log_level().into(), - }; + fn set_regs(&self, regs: &CommonRegisters) -> Result<()> { + Ok(self.vcpu_fd.set_regs(®s.into())?) + } - let regs = StandardRegisters { - rip: self.entrypoint, - rsp: self.orig_rsp.absolute()?, - rflags: 2, //bit 1 of rlags is required to be set + fn sregs(&self) -> Result { + Ok((&self.vcpu_fd.get_sregs()?).into()) + } - // function args - rdi: peb_addr.into(), - rsi: seed, - rdx: page_size.into(), - rcx: max_guest_log_level, + fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()> { + self.vcpu_fd.set_sregs(&sregs.into())?; + Ok(()) + } - ..Default::default() - }; - self.vcpu_fd.set_regs(®s)?; + fn fpu(&self) -> Result { + Ok((&self.vcpu_fd.get_fpu()?).into()) + } + fn set_fpu(&self, fpu: &CommonFpu) -> Result<()> { + self.vcpu_fd.set_fpu(&fpu.into())?; Ok(()) } + #[cfg(crashdump)] + fn xsave(&self) -> Result> { + let xsave = self.vcpu_fd.get_xsave()?; + Ok(xsave.buffer.to_vec()) + } + /// # Safety /// The caller must ensure that the memory region is valid and points to valid memory, /// and lives long enough for the VM to use it. @@ -488,30 +137,6 @@ impl Hypervisor for HypervLinuxDriver { Ok(()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn dispatch_call_from_host( - &mut self, - dispatch_func_addr: RawPtr, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - #[cfg(gdb)] dbg_mem_access_fn: Arc>>, - ) -> Result<()> { - // Reset general purpose registers, then set RIP and RSP - let regs = StandardRegisters { - rip: dispatch_func_addr.into(), - rsp: self.orig_rsp.absolute()?, - rflags: 2, //bit 1 of rlags is required to be set - ..Default::default() - }; - self.vcpu_fd.set_regs(®s)?; - - // reset fpu state - self.set_fpu(&CommonFpu::default())?; - - // run - Ok(()) - } - fn run_vcpu(&mut self) -> Result { const HALT_MESSAGE: hv_message_type = hv_message_type_HVMSG_X64_HALT; const IO_PORT_INTERCEPT_MESSAGE: hv_message_type = @@ -564,8 +189,6 @@ impl Hypervisor for HypervLinuxDriver { } #[cfg(gdb)] EXCEPTION_INTERCEPT => { - use mshv_bindings::DebugRegisters; - let ex_info = m .to_exception_info() .map_err(mshv_ioctls::MshvError::from)?; @@ -586,335 +209,128 @@ impl Hypervisor for HypervLinuxDriver { }; Ok(result) } +} - fn regs(&self) -> Result { - let mshv_regs = self.vcpu_fd.get_regs()?; - Ok((&mshv_regs).into()) - } +#[cfg(gdb)] +impl DebuggableVm for MshvVm { + fn translate_gva(&self, gva: u64) -> Result { + use mshv_bindings::{HV_TRANSLATE_GVA_VALIDATE_READ, HV_TRANSLATE_GVA_VALIDATE_WRITE}; - fn set_regs(&mut self, regs: &super::regs::CommonRegisters) -> Result<()> { - let mshv_regs: StandardRegisters = regs.into(); - self.vcpu_fd.set_regs(&mshv_regs)?; - Ok(()) - } + use crate::HyperlightError; - fn fpu(&self) -> Result { - let mshv_fpu = self.vcpu_fd.get_fpu()?; - Ok((&mshv_fpu).into()) - } + let flags = (HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE) as u64; + let (addr, _) = self + .vcpu_fd + .translate_gva(gva, flags) + .map_err(|_| HyperlightError::TranslateGuestAddress(gva))?; - fn set_fpu(&mut self, fpu: &super::regs::CommonFpu) -> Result<()> { - let mshv_fpu: FloatingPointUnit = fpu.into(); - self.vcpu_fd.set_fpu(&mshv_fpu)?; - Ok(()) + Ok(addr) } - fn sregs(&self) -> Result { - let mshv_sregs = self.vcpu_fd.get_sregs()?; - Ok((&mshv_sregs).into()) - } + fn set_debug(&mut self, enabled: bool) -> Result<()> { + use mshv_bindings::{ + HV_INTERCEPT_ACCESS_MASK_EXECUTE, hv_intercept_parameters, + hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, mshv_install_intercept, + }; - fn set_sregs(&mut self, sregs: &super::regs::CommonSpecialRegisters) -> Result<()> { - let mshv_sregs: SpecialRegisters = sregs.into(); - self.vcpu_fd.set_sregs(&mshv_sregs)?; - Ok(()) - } + use crate::hypervisor::gdb::arch::{BP_EX_ID, DB_EX_ID}; - fn interrupt_handle(&self) -> Arc { - self.interrupt_handle.clone() - } + if enabled { + self.vm_fd + .install_intercept(mshv_install_intercept { + access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE, + intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, + // Exception handler #DB (1) + intercept_parameter: hv_intercept_parameters { + exception_vector: DB_EX_ID as u16, + }, + }) + .map_err(|e| new_error!("Cannot install debug exception intercept: {}", e))?; - fn clear_cancel(&self) { - self.interrupt_handle.clear_cancel(); + // Install intercept for #BP (3) exception + self.vm_fd + .install_intercept(mshv_install_intercept { + access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE, + intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, + // Exception handler #BP (3) + intercept_parameter: hv_intercept_parameters { + exception_vector: BP_EX_ID as u16, + }, + }) + .map_err(|e| new_error!("Cannot install breakpoint exception intercept: {}", e))?; + } else { + // There doesn't seem to be any way to remove installed intercepts. But that's okay. + } + Ok(()) } - #[cfg(crashdump)] - fn crashdump_context(&self) -> Result> { - if self.rt_cfg.guest_core_dump { - let mut regs = [0; 27]; - - let vcpu_regs = self.vcpu_fd.get_regs()?; - let sregs = self.vcpu_fd.get_sregs()?; - let xsave = self.vcpu_fd.get_xsave()?; - - // Set up the registers for the crash dump - regs[0] = vcpu_regs.r15; // r15 - regs[1] = vcpu_regs.r14; // r14 - regs[2] = vcpu_regs.r13; // r13 - regs[3] = vcpu_regs.r12; // r12 - regs[4] = vcpu_regs.rbp; // rbp - regs[5] = vcpu_regs.rbx; // rbx - regs[6] = vcpu_regs.r11; // r11 - regs[7] = vcpu_regs.r10; // r10 - regs[8] = vcpu_regs.r9; // r9 - regs[9] = vcpu_regs.r8; // r8 - regs[10] = vcpu_regs.rax; // rax - regs[11] = vcpu_regs.rcx; // rcx - regs[12] = vcpu_regs.rdx; // rdx - regs[13] = vcpu_regs.rsi; // rsi - regs[14] = vcpu_regs.rdi; // rdi - regs[15] = 0; // orig rax - regs[16] = vcpu_regs.rip; // rip - regs[17] = sregs.cs.selector as u64; // cs - regs[18] = vcpu_regs.rflags; // eflags - regs[19] = vcpu_regs.rsp; // rsp - regs[20] = sregs.ss.selector as u64; // ss - regs[21] = sregs.fs.base; // fs_base - regs[22] = sregs.gs.base; // gs_base - regs[23] = sregs.ds.selector as u64; // ds - regs[24] = sregs.es.selector as u64; // es - regs[25] = sregs.fs.selector as u64; // fs - regs[26] = sregs.gs.selector as u64; // gs - - // Get the filename from the binary path - let filename = self.rt_cfg.binary_path.clone().and_then(|path| { - Path::new(&path) - .file_name() - .and_then(|name| name.to_os_string().into_string().ok()) - }); - - // Include both initial sandbox regions and dynamically mapped regions - let mut regions: Vec = self.sandbox_regions.clone(); - regions.extend(self.mmap_regions.iter().cloned()); - Ok(Some(crashdump::CrashDumpContext::new( - regions, - regs, - xsave.buffer.to_vec(), - self.entrypoint, - self.rt_cfg.binary_path.clone(), - filename, - ))) + fn set_single_step(&mut self, enable: bool) -> Result<()> { + let mut regs = self.regs()?; + if enable { + regs.rflags |= 1 << 8; } else { - Ok(None) + regs.rflags &= !(1 << 8); } + self.set_regs(®s)?; + Ok(()) } - #[cfg(gdb)] - fn handle_debug( - &mut self, - dbg_mem_access_fn: Arc>>, - stop_reason: VcpuStopReason, - ) -> Result<()> { - if self.debug.is_none() { - return Err(new_error!("Debugging is not enabled")); + fn add_hw_breakpoint(&mut self, addr: u64) -> Result<()> { + use crate::hypervisor::gdb::arch::MAX_NO_OF_HW_BP; + + let mut debug_regs = self.vcpu_fd.get_debug_regs()?; + + // Check if breakpoint already exists + if [ + debug_regs.dr0, + debug_regs.dr1, + debug_regs.dr2, + debug_regs.dr3, + ] + .contains(&addr) + { + return Ok(()); } - let mem_access = DebugMemoryAccess { - dbg_mem_access_fn, - guest_mmap_regions: self.mmap_regions.to_vec(), - }; + // Find the first available LOCAL (L0–L3) slot + let i = (0..MAX_NO_OF_HW_BP) + .position(|i| debug_regs.dr7 & (1 << (i * 2)) == 0) + .ok_or_else(|| new_error!("Tried to add more than 4 hardware breakpoints"))?; - match stop_reason { - // If the vCPU stopped because of a crash, we need to handle it differently - // We do not want to allow resuming execution or placing breakpoints - // because the guest has crashed. - // We only allow reading registers and memory - VcpuStopReason::Crash => { - self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) - .map_err(|e| { - new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e) - })?; - - loop { - log::debug!("Debug wait for event to resume vCPU"); - // Wait for a message from gdb - let req = self.recv_dbg_msg()?; - - // Flag to store if we should deny continue or step requests - let mut deny_continue = false; - // Flag to store if we should detach from the gdb session - let mut detach = false; - - let response = match req { - // Allow the detach request to disable debugging by continuing resuming - // hypervisor crash error reporting - DebugMsg::DisableDebug => { - detach = true; - DebugResponse::DisableDebug - } - // Do not allow continue or step requests - DebugMsg::Continue | DebugMsg::Step => { - deny_continue = true; - DebugResponse::NotAllowed - } - // Do not allow adding/removing breakpoints and writing to memory or registers - DebugMsg::AddHwBreakpoint(_) - | DebugMsg::AddSwBreakpoint(_) - | DebugMsg::RemoveHwBreakpoint(_) - | DebugMsg::RemoveSwBreakpoint(_) - | DebugMsg::WriteAddr(_, _) - | DebugMsg::WriteRegisters(_) => DebugResponse::NotAllowed, - - // For all other requests, we will process them normally - _ => { - let result = self.process_dbg_request(req, &mem_access); - match result { - Ok(response) => response, - Err(HyperlightError::TranslateGuestAddress(_)) => { - // Treat non fatal errors separately so the guest doesn't fail - DebugResponse::ErrorOccurred - } - Err(e) => { - log::error!("Error processing debug request: {:?}", e); - return Err(e); - } - } - } - }; - - // Send the response to the request back to gdb - self.send_dbg_msg(response) - .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; - - // If we are denying continue or step requests, the debugger assumes the - // execution started so we need to report a stop reason as a crash and let - // it request to read registers/memory to figure out what happened - if deny_continue { - self.send_dbg_msg(DebugResponse::VcpuStopped(VcpuStopReason::Crash)) - .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; - } + // Assign to corresponding debug register + *[ + &mut debug_regs.dr0, + &mut debug_regs.dr1, + &mut debug_regs.dr2, + &mut debug_regs.dr3, + ][i] = addr; - // If we are detaching, we will break the loop and the Hypervisor will continue - // to handle the Crash reason - if detach { - break; - } - } - } - // If the vCPU stopped because of any other reason except a crash, we can handle it - // normally - _ => { - // Send the stop reason to the gdb thread - self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) - .map_err(|e| { - new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e) - })?; - - loop { - log::debug!("Debug wait for event to resume vCPU"); - // Wait for a message from gdb - let req = self.recv_dbg_msg()?; - - let result = self.process_dbg_request(req, &mem_access); - - let response = match result { - Ok(response) => response, - // Treat non fatal errors separately so the guest doesn't fail - Err(HyperlightError::TranslateGuestAddress(_)) => { - DebugResponse::ErrorOccurred - } - Err(e) => { - return Err(e); - } - }; - - let cont = matches!( - response, - DebugResponse::Continue | DebugResponse::Step | DebugResponse::DisableDebug - ); - - self.send_dbg_msg(response) - .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; - - // Check if we should continue execution - // We continue if the response is one of the following: Step, Continue, or DisableDebug - if cont { - break; - } - } - } - } + // Enable LOCAL bit + debug_regs.dr7 |= 1 << (i * 2); + self.vcpu_fd.set_debug_regs(&debug_regs)?; Ok(()) } - #[cfg(feature = "mem_profile")] - fn trace_info_mut(&mut self) -> &mut MemTraceInfo { - &mut self.trace_info - } -} - -impl Drop for HypervLinuxDriver { - #[instrument(skip_all, parent = Span::current(), level = "Trace")] - fn drop(&mut self) { - self.interrupt_handle.set_dropped(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - #[cfg(feature = "mem_profile")] - use crate::mem::exe::DummyUnwindInfo; - use crate::mem::memory_region::MemoryRegionVecBuilder; - use crate::mem::shared_mem::{ExclusiveSharedMemory, SharedMemory}; - - #[rustfmt::skip] - const CODE: [u8; 12] = [ - 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ - 0x00, 0xd8, /* add %bl, %al */ - 0x04, b'0', /* add $'0', %al */ - 0xee, /* out %al, (%dx) */ - /* send a 0 to indicate we're done */ - 0xb0, b'\0', /* mov $'\0', %al */ - 0xee, /* out %al, (%dx) */ - 0xf4, /* HLT */ - ]; - - fn shared_mem_with_code( - code: &[u8], - mem_size: usize, - load_offset: usize, - ) -> Result> { - if load_offset > mem_size { - log_then_return!( - "code load offset ({}) > memory size ({})", - load_offset, - mem_size - ); - } - let mut shared_mem = ExclusiveSharedMemory::new(mem_size)?; - shared_mem.copy_from_slice(code, load_offset)?; - Ok(Box::new(shared_mem)) - } - - #[test] - fn create_driver() { - if !super::is_hypervisor_present() { - return; + fn remove_hw_breakpoint(&mut self, addr: u64) -> Result<()> { + let mut debug_regs = self.vcpu_fd.get_debug_regs()?; + + let regs = [ + &mut debug_regs.dr0, + &mut debug_regs.dr1, + &mut debug_regs.dr2, + &mut debug_regs.dr3, + ]; + + if let Some(i) = regs.iter().position(|&&mut reg| reg == addr) { + // Clear the address + *regs[i] = 0; + // Disable LOCAL bit + debug_regs.dr7 &= !(1 << (i * 2)); + self.vcpu_fd.set_debug_regs(&debug_regs)?; + Ok(()) + } else { + Err(new_error!("Tried to remove non-existing hw-breakpoint")) } - const MEM_SIZE: usize = 0x3000; - let gm = shared_mem_with_code(CODE.as_slice(), MEM_SIZE, 0).unwrap(); - let rsp_ptr = GuestPtr::try_from(0).unwrap(); - let pml4_ptr = GuestPtr::try_from(0).unwrap(); - let entrypoint_ptr = GuestPtr::try_from(0).unwrap(); - let mut regions = MemoryRegionVecBuilder::new(0, gm.base_addr()); - regions.push_page_aligned( - MEM_SIZE, - MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE, - crate::mem::memory_region::MemoryRegionType::Code, - ); - let config: SandboxConfiguration = Default::default(); - - super::HypervLinuxDriver::new( - entrypoint_ptr, - rsp_ptr, - pml4_ptr, - &config, - #[cfg(gdb)] - None, - #[cfg(crashdump)] - SandboxRuntimeConfig { - #[cfg(crashdump)] - binary_path: None, - #[cfg(gdb)] - debug_info: None, - #[cfg(crashdump)] - guest_core_dump: true, - }, - #[cfg(feature = "mem_profile")] - MemTraceInfo::new(Arc::new(DummyUnwindInfo {})).unwrap(), - ) - .unwrap(); } } diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index f23736063..9507de40b 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -14,535 +14,413 @@ See the License for the specific language governing permissions and limitations under the License. */ -use std::fmt; -use std::fmt::{Debug, Formatter}; -use std::string::String; -use std::sync::atomic::{AtomicBool, AtomicU8}; -use std::sync::{Arc, Mutex}; - -use log::LevelFilter; -use tracing::{Span, instrument}; -#[cfg(feature = "trace_guest")] -use tracing_opentelemetry::OpenTelemetrySpanExt; -use windows::Win32::System::Hypervisor::{WHV_MEMORY_ACCESS_TYPE, WHV_RUN_VP_EXIT_REASON}; -#[cfg(crashdump)] -use {super::crashdump, std::path::Path}; -#[cfg(gdb)] -use { - super::gdb::{ - DebugCommChannel, DebugMemoryAccess, DebugMsg, DebugResponse, GuestDebug, HypervDebug, - VcpuStopReason, - }, - crate::HyperlightError, +use std::os::raw::c_void; + +use hyperlight_common::mem::PAGE_SIZE_USIZE; +use windows::Win32::Foundation::{FreeLibrary, HANDLE}; +use windows::Win32::System::Hypervisor::*; +use windows::Win32::System::LibraryLoader::*; +use windows::core::s; +use windows_result::HRESULT; + +use super::regs::{ + Align16, WHP_FPU_NAMES, WHP_FPU_NAMES_LEN, WHP_REGS_NAMES, WHP_REGS_NAMES_LEN, WHP_SREGS_NAMES, + WHP_SREGS_NAMES_LEN, }; - -use super::regs::CommonSpecialRegisters; use super::surrogate_process::SurrogateProcess; -use super::surrogate_process_manager::*; -use super::windows_hypervisor_platform::{VMPartition, VMProcessor}; +use super::surrogate_process_manager::get_surrogate_process_manager; use super::wrappers::HandleWrapper; -use super::{HyperlightExit, Hypervisor, InterruptHandle, VirtualCPU}; -use crate::hypervisor::regs::{CommonFpu, CommonRegisters}; -use crate::hypervisor::{InterruptHandleImpl, WindowsInterruptHandle, get_memory_access_violation}; -use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; -use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::{GuestPtr, RawPtr}; -use crate::mem::shared_mem::HostSharedMemory; #[cfg(gdb)] -use crate::new_error; -use crate::sandbox::host_funcs::FunctionRegistry; -use crate::sandbox::outb::handle_outb; -#[cfg(feature = "mem_profile")] -use crate::sandbox::trace::MemTraceInfo; -#[cfg(crashdump)] -use crate::sandbox::uninitialized::SandboxRuntimeConfig; -use crate::{Result, debug, log_then_return}; - -#[cfg(gdb)] -mod debug { - use windows::Win32::System::Hypervisor::WHV_VP_EXCEPTION_CONTEXT; - - use super::{HypervWindowsDriver, *}; - use crate::Result; - use crate::hypervisor::gdb::{DebugMemoryAccess, DebugMsg, DebugResponse, VcpuStopReason}; - - impl HypervWindowsDriver { - /// Resets the debug information to disable debugging - fn disable_debug(&mut self) -> Result<()> { - let mut debug = HypervDebug::default(); - - debug.set_single_step(&self.processor, false)?; - - self.debug = Some(debug); - - Ok(()) +use crate::hypervisor::gdb::DebuggableVm; +use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; +use crate::hypervisor::{HyperlightExit, Hypervisor}; +use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; +use crate::{Result, log_then_return, new_error}; + +#[allow(dead_code)] // Will be used for runtime hypervisor detection +pub(crate) fn is_hypervisor_present() -> bool { + let mut capability: WHV_CAPABILITY = Default::default(); + let written_size: Option<*mut u32> = None; + + match unsafe { + WHvGetCapability( + WHvCapabilityCodeHypervisorPresent, + &mut capability as *mut _ as *mut c_void, + std::mem::size_of::() as u32, + written_size, + ) + } { + Ok(_) => unsafe { capability.HypervisorPresent.as_bool() }, + Err(_) => { + log::info!("Windows Hypervisor Platform is not available on this system"); + false } + } +} - /// Get the reason the vCPU has stopped - pub(crate) fn get_stop_reason( - &mut self, - exception: WHV_VP_EXCEPTION_CONTEXT, - ) -> Result { - let debug = self - .debug - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - debug.get_stop_reason(&self.processor, exception, self.entrypoint) - } +// This function dynamically loads the WHvMapGpaRange2 function from the winhvplatform.dll +// WHvMapGpaRange2 only available on Windows 11 or Windows Server 2022 and later +// we do things this way to allow a user trying to load hyperlight on an older version of windows to +// get an error message saying that hyperlight requires a newer version of windows, rather than just failing +// with an error about a missing entrypoint +// This function should always succeed since before we get here we have already checked that the hypervisor is present and +// that we are on a supported version of windows. +type WHvMapGpaRange2Func = unsafe extern "C" fn( + WHV_PARTITION_HANDLE, + HANDLE, + *const c_void, + u64, + u64, + WHV_MAP_GPA_RANGE_FLAGS, +) -> HRESULT; - pub(crate) fn process_dbg_request( - &mut self, - req: DebugMsg, - mem_access: &DebugMemoryAccess, - ) -> Result { - if let Some(debug) = self.debug.as_mut() { - match req { - DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint( - debug - .add_hw_breakpoint(&self.processor, addr) - .map_err(|e| { - log::error!("Failed to add hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint( - debug - .add_sw_breakpoint(&self.processor, addr, mem_access) - .map_err(|e| { - log::error!("Failed to add sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Continue => { - debug.set_single_step(&self.processor, false).map_err(|e| { - log::error!("Failed to continue execution: {:?}", e); - - e - })?; - - Ok(DebugResponse::Continue) - } - DebugMsg::DisableDebug => { - self.disable_debug().map_err(|e| { - log::error!("Failed to disable debugging: {:?}", e); +/// A Hypervisor driver for HyperV-on-Windows. +#[derive(Debug)] +pub(crate) struct WhpVm { + partition: WHV_PARTITION_HANDLE, + // Surrogate process for memory mapping + surrogate_process: SurrogateProcess, + // Offset between surrogate process and host process addresses (accounting for guard page) + // Calculated lazily on first map_memory call + surrogate_offset: Option, + // Track if initial memory setup is complete. + // Used to reject later memory mapping since it's not supported on windows. + // TODO remove this flag once memory mapping is supported on windows. + initial_memory_setup_done: bool, +} - e - })?; +unsafe impl Send for WhpVm {} +unsafe impl Sync for WhpVm {} + +impl WhpVm { + pub(crate) fn new(mmap_file_handle: HandleWrapper, raw_size: usize) -> Result { + const NUM_CPU: u32 = 1; + let partition = unsafe { + let partition = WHvCreatePartition()?; + WHvSetPartitionProperty( + partition, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + )?; + WHvSetupPartition(partition)?; + WHvCreateVirtualProcessor(partition, 0, 0)?; + partition + }; - Ok(DebugResponse::DisableDebug) - } - DebugMsg::GetCodeSectionOffset => { - let offset = mem_access - .dbg_mem_access_fn - .try_lock() - .map_err(|e| { - new_error!("Error locking at {}:{}: {}", file!(), line!(), e) - })? - .layout - .get_guest_code_address(); - - Ok(DebugResponse::GetCodeSectionOffset(offset as u64)) - } - DebugMsg::ReadAddr(addr, len) => { - let mut data = vec![0u8; len]; + // Create the surrogate process with the total memory size + let mgr = get_surrogate_process_manager()?; + let surrogate_process = mgr.get_surrogate_process(raw_size, mmap_file_handle)?; - debug - .read_addrs(&self.processor, addr, &mut data, mem_access) - .map_err(|e| { - log::error!("Failed to read from address: {:?}", e); + Ok(WhpVm { + partition, + surrogate_process, + surrogate_offset: None, + initial_memory_setup_done: false, + }) + } - e - })?; + /// Helper for setting arbitrary registers. + fn set_registers(&self, registers: &[(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)]) -> Result<()> { + let register_count = registers.len(); - Ok(DebugResponse::ReadAddr(data)) - } - DebugMsg::ReadRegisters => debug - .read_regs(&self.processor) - .map_err(|e| { - log::error!("Failed to read registers: {:?}", e); - - e - }) - .map(|(regs, fpu)| DebugResponse::ReadRegisters(Box::new((regs, fpu)))), - DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint( - debug - .remove_hw_breakpoint(&self.processor, addr) - .map_err(|e| { - log::error!("Failed to remove hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint( - debug - .remove_sw_breakpoint(&self.processor, addr, mem_access) - .map_err(|e| { - log::error!("Failed to remove sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Step => { - debug.set_single_step(&self.processor, true).map_err(|e| { - log::error!("Failed to enable step instruction: {:?}", e); - - e - })?; - - Ok(DebugResponse::Step) - } - DebugMsg::WriteAddr(addr, data) => { - debug - .write_addrs(&self.processor, addr, &data, mem_access) - .map_err(|e| { - log::error!("Failed to write to address: {:?}", e); + // Prepare register names (no special alignment needed) + let mut register_names = Vec::with_capacity(register_count); + let mut register_values = Vec::with_capacity(register_count); - e - })?; + for (key, value) in registers.iter() { + register_names.push(*key); + register_values.push(Align16(*value)); + } - Ok(DebugResponse::WriteAddr) - } - DebugMsg::WriteRegisters(boxed_regs) => { - let (regs, fpu) = boxed_regs.as_ref(); - debug - .write_regs(&self.processor, regs, fpu) - .map_err(|e| { - log::error!("Failed to write registers: {:?}", e); - - e - }) - .map(|_| DebugResponse::WriteRegisters) - } - } - } else { - Err(new_error!("Debugging is not enabled")) - } + unsafe { + WHvSetVirtualProcessorRegisters( + self.partition, + 0, + register_names.as_ptr(), + register_count as u32, + register_values.as_ptr() as *const WHV_REGISTER_VALUE, + )?; } - pub(crate) fn recv_dbg_msg(&mut self) -> Result { - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; + Ok(()) + } +} + +impl Hypervisor for WhpVm { + /// Get the partition handle for this VM + fn partition_handle(&self) -> WHV_PARTITION_HANDLE { + self.partition + } + fn regs(&self) -> Result { + let mut whv_regs_values: [Align16; WHP_REGS_NAMES_LEN] = + unsafe { std::mem::zeroed() }; + + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + WHP_REGS_NAMES.as_ptr(), + whv_regs_values.len() as u32, + whv_regs_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + )?; + } - gdb_conn.recv().map_err(|e| { + WHP_REGS_NAMES + .into_iter() + .zip(whv_regs_values) + .collect::)>>() + .as_slice() + .try_into() + .map_err(|e| { new_error!( - "Got an error while waiting to receive a - message: {:?}", + "Failed to convert WHP registers to CommonRegisters: {:?}", e ) }) - } - - pub(crate) fn send_dbg_msg(&mut self, cmd: DebugResponse) -> Result<()> { - log::debug!("Sending {:?}", cmd); - - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - gdb_conn - .send(cmd) - .map_err(|e| new_error!("Got an error while sending a response message {:?}", e)) - } } -} - -/// A Hypervisor driver for HyperV-on-Windows. -pub(crate) struct HypervWindowsDriver { - processor: VMProcessor, - _surrogate_process: SurrogateProcess, // we need to keep a reference to the SurrogateProcess for the duration of the driver since otherwise it will dropped and the memory mapping will be unmapped and the surrogate process will be returned to the pool - entrypoint: u64, - orig_rsp: GuestPtr, - interrupt_handle: Arc, - - sandbox_regions: Vec, // Initially mapped regions when sandbox is created - mmap_regions: Vec, // Later mapped regions - - #[cfg(gdb)] - debug: Option, - #[cfg(gdb)] - gdb_conn: Option>, - #[cfg(crashdump)] - rt_cfg: SandboxRuntimeConfig, - #[cfg(feature = "mem_profile")] - trace_info: MemTraceInfo, -} -/* This does not automatically impl Send because the host - * address of the shared memory region is a raw pointer, which are - * marked as !Send (and !Sync). However, the access patterns used - * here are safe. - */ -unsafe impl Send for HypervWindowsDriver {} - -impl HypervWindowsDriver { - #[allow(clippy::too_many_arguments)] - // TODO: refactor this function to take fewer arguments. Add trace_info to rt_cfg - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn new( - mem_regions: Vec, - raw_size: usize, - pml4_address: u64, - entrypoint: u64, - rsp: u64, - mmap_file_handle: HandleWrapper, - #[cfg(gdb)] gdb_conn: Option>, - #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, - #[cfg(feature = "mem_profile")] trace_info: MemTraceInfo, - ) -> Result { - // create and setup hypervisor partition - let mut partition = VMPartition::new(1)?; - - // get a surrogate process with preallocated memory of size SharedMemory::raw_mem_size() - // with guard pages setup - let surrogate_process = { - let mgr = get_surrogate_process_manager()?; - mgr.get_surrogate_process(raw_size, mmap_file_handle) - }?; - - partition.map_gpa_range(&mem_regions, &surrogate_process)?; - - let proc = VMProcessor::new(partition)?; - let partition_handle = proc.get_partition_hdl(); - #[cfg(gdb)] - let (debug, gdb_conn) = if let Some(gdb_conn) = gdb_conn { - let mut debug = HypervDebug::new(); - debug.add_hw_breakpoint(&proc, entrypoint)?; + fn set_regs(&self, regs: &CommonRegisters) -> Result<()> { + let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_REGS_NAMES_LEN] = + regs.into(); + let whp_regs_unaligned: Vec<(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)> = whp_regs + .iter() + .map(|(name, value)| (*name, value.0)) + .collect(); + self.set_registers(&whp_regs_unaligned)?; + Ok(()) + } - (Some(debug), Some(gdb_conn)) - } else { - (None, None) - }; + fn sregs(&self) -> Result { + let mut whp_sregs_values: [Align16; WHP_SREGS_NAMES_LEN] = + unsafe { std::mem::zeroed() }; + + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + WHP_SREGS_NAMES.as_ptr(), + whp_sregs_values.len() as u32, + whp_sregs_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + )?; + } - let interrupt_handle = Arc::new(WindowsInterruptHandle { - state: AtomicU8::new(0), - partition_handle, - dropped: AtomicBool::new(false), - }); - - let mut hv = Self { - processor: proc, - _surrogate_process: surrogate_process, - entrypoint, - orig_rsp: GuestPtr::try_from(RawPtr::from(rsp))?, - interrupt_handle: interrupt_handle.clone(), - sandbox_regions: mem_regions, - mmap_regions: Vec::new(), - #[cfg(gdb)] - debug, - #[cfg(gdb)] - gdb_conn, - #[cfg(crashdump)] - rt_cfg, - #[cfg(feature = "mem_profile")] - trace_info, - }; + WHP_SREGS_NAMES + .into_iter() + .zip(whp_sregs_values) + .collect::)>>() + .as_slice() + .try_into() + .map_err(|e| { + new_error!( + "Failed to convert WHP registers to CommonSpecialRegisters: {:?}", + e + ) + }) + } - hv.setup_initial_sregs(pml4_address)?; + fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()> { + let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_SREGS_NAMES_LEN] = + sregs.into(); + let whp_regs_unaligned: Vec<(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)> = whp_regs + .iter() + .map(|(name, value)| (*name, value.0)) + .collect(); + self.set_registers(&whp_regs_unaligned)?; + Ok(()) + } - // Send the interrupt handle to the GDB thread if debugging is enabled - // This is used to allow the GDB thread to stop the vCPU - #[cfg(gdb)] - if hv.debug.is_some() { - hv.send_dbg_msg(DebugResponse::InterruptHandle(interrupt_handle))?; + fn fpu(&self) -> Result { + let mut whp_fpu_values: [Align16; WHP_FPU_NAMES_LEN] = + unsafe { std::mem::zeroed() }; + + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + WHP_FPU_NAMES.as_ptr(), + whp_fpu_values.len() as u32, + whp_fpu_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + )?; } - Ok(hv) + WHP_FPU_NAMES + .into_iter() + .zip(whp_fpu_values) + .collect::)>>() + .as_slice() + .try_into() + .map_err(|e| new_error!("Failed to convert WHP registers to CommonFpu: {:?}", e)) } - #[inline] - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn get_exit_details(&self, exit_reason: WHV_RUN_VP_EXIT_REASON) -> Result { - let mut error = String::new(); - error.push_str(&format!( - "Did not receive a halt from Hypervisor as expected - Received {exit_reason:?}!\n" - )); - error.push_str(&format!("Registers: \n{:#?}", self.processor.regs()?)); - Ok(error) + fn set_fpu(&self, fpu: &CommonFpu) -> Result<()> { + let whp_fpu: [(WHV_REGISTER_NAME, Align16); WHP_FPU_NAMES_LEN] = + fpu.into(); + let whp_fpu_unaligned: Vec<(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)> = whp_fpu + .iter() + .map(|(name, value)| (*name, value.0)) + .collect(); + self.set_registers(&whp_fpu_unaligned)?; + Ok(()) } -} -impl Debug for HypervWindowsDriver { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - let mut fs = f.debug_struct("HyperV Driver"); - - fs.field("Entrypoint", &self.entrypoint) - .field("Original RSP", &self.orig_rsp); + #[cfg(crashdump)] + fn xsave(&self) -> Result> { + use crate::HyperlightError; + + // Get the required buffer size by calling with NULL buffer. + // If the buffer is not large enough (0 won't be), WHvGetVirtualProcessorXsaveState returns + // WHV_E_INSUFFICIENT_BUFFER and sets buffer_size_needed to the required size. + let mut buffer_size_needed: u32 = 0; + + let result = unsafe { + WHvGetVirtualProcessorXsaveState( + self.partition, + 0, + std::ptr::null_mut(), + 0, + &mut buffer_size_needed, + ) + }; - for region in &self.sandbox_regions { - fs.field("Sandbox Memory Region", ®ion); - } - for region in &self.mmap_regions { - fs.field("Mapped Memory Region", ®ion); + // Expect insufficient buffer error; any other error is unexpected + if let Err(e) = result + && e.code() != windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER + { + return Err(HyperlightError::WindowsAPIError(e)); } - // Get the registers - if let Ok(regs) = self.processor.regs() { - fs.field("Registers", ®s); - } + // Allocate buffer with the required size + let mut xsave_buffer = vec![0u8; buffer_size_needed as usize]; + let mut written_bytes = 0; + + // Get the actual Xsave state + unsafe { + WHvGetVirtualProcessorXsaveState( + self.partition, + 0, + xsave_buffer.as_mut_ptr() as *mut std::ffi::c_void, + buffer_size_needed, + &mut written_bytes, + ) + }?; - // Get the special registers - if let Ok(special_regs) = self.processor.sregs() { - fs.field("SpecialRegisters", &special_regs); + // Verify the number of written bytes matches the expected size + if written_bytes != buffer_size_needed { + return Err(new_error!( + "Failed to get Xsave state: expected {} bytes, got {}", + buffer_size_needed, + written_bytes + )); } - fs.finish() + Ok(xsave_buffer) } -} -impl Hypervisor for HypervWindowsDriver { - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn initialise( - &mut self, - peb_address: RawPtr, - seed: u64, - page_size: u32, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - max_guest_log_level: Option, - #[cfg(gdb)] dbg_mem_access_hdl: Arc>>, - ) -> Result<()> { - let max_guest_log_level: u64 = match max_guest_log_level { - Some(level) => level as u64, - None => self.get_max_log_level().into(), + unsafe fn map_memory(&mut self, (_slot, region): (u32, &MemoryRegion)) -> Result<()> { + // Only allow memory mapping during initial setup (the first batch of regions). + // After the initial setup is complete, subsequent calls should fail, + // since it's not yet implemented. + if self.initial_memory_setup_done { + // Initial setup already completed - reject this mapping + log_then_return!( + "Mapping host memory into the guest not yet supported on this platform" + ); + } + + // Calculate the offset on first call. The offset accounts for the guard page + // at the start of the surrogate process memory. + let offset = if let Some(offset) = self.surrogate_offset { + offset + } else { + // surrogate_address points to the start of the guard page, so add PAGE_SIZE + // to get to the actual shared memory start + let surrogate_address = + self.surrogate_process.allocated_address as usize + PAGE_SIZE_USIZE; + let host_address = region.host_region.start; + let offset = isize::try_from(surrogate_address)? - isize::try_from(host_address)?; + self.surrogate_offset = Some(offset); + offset }; - let regs = CommonRegisters { - rip: self.entrypoint, - rsp: self.orig_rsp.absolute()?, + let process_handle: HANDLE = self.surrogate_process.process_handle.into(); - // function args - rdi: peb_address.into(), - rsi: seed, - rdx: page_size.into(), - rcx: max_guest_log_level, - rflags: 1 << 1, // eflags bit index 1 is reserved and always needs to be 1 + let whvmapgparange2_func = unsafe { + match try_load_whv_map_gpa_range2() { + Ok(func) => func, + Err(e) => return Err(new_error!("Can't find API: {}", e)), + } + }; - ..Default::default() + let flags = region + .flags + .iter() + .map(|flag| match flag { + MemoryRegionFlags::NONE => Ok(WHvMapGpaRangeFlagNone), + MemoryRegionFlags::READ => Ok(WHvMapGpaRangeFlagRead), + MemoryRegionFlags::WRITE => Ok(WHvMapGpaRangeFlagWrite), + MemoryRegionFlags::EXECUTE => Ok(WHvMapGpaRangeFlagExecute), + MemoryRegionFlags::STACK_GUARD => Ok(WHvMapGpaRangeFlagNone), + _ => Err(new_error!("Invalid Memory Region Flag")), + }) + .collect::>>()? + .iter() + .fold(WHvMapGpaRangeFlagNone, |acc, flag| acc | *flag); + + // Calculate the surrogate process address for this region + let surrogate_addr = (isize::try_from(region.host_region.start)? + offset) as *const c_void; + + let res = unsafe { + whvmapgparange2_func( + self.partition, + process_handle, + surrogate_addr, + region.guest_region.start as u64, + region.guest_region.len() as u64, + flags, + ) }; - self.set_regs(®s)?; + if res.is_err() { + return Err(new_error!("Call to WHvMapGpaRange2 failed")); + } Ok(()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - unsafe fn map_region(&mut self, _region: &MemoryRegion) -> Result<()> { + fn unmap_memory(&mut self, (_slot, _region): (u32, &MemoryRegion)) -> Result<()> { log_then_return!("Mapping host memory into the guest not yet supported on this platform"); } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - unsafe fn unmap_region(&mut self, _region: &MemoryRegion) -> Result<()> { - log_then_return!("Mapping host memory into the guest not yet supported on this platform"); - } - - fn get_mapped_regions(&self) -> Box + '_> { - Box::new(self.mmap_regions.iter()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn dispatch_call_from_host( - &mut self, - dispatch_func_addr: RawPtr, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - #[cfg(gdb)] dbg_mem_access_hdl: Arc>>, - ) -> Result<()> { - // Reset general purpose registers, then set RIP and RSP - let regs = CommonRegisters { - rip: dispatch_func_addr.into(), - rsp: self.orig_rsp.absolute()?, - rflags: 1 << 1, // eflags bit index 1 is reserved and always needs to be 1 - ..Default::default() - }; - self.processor.set_regs(®s)?; - - // reset fpu state - self.processor.set_fpu(&CommonFpu::default())?; - - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn handle_io( - &mut self, - port: u16, - data: Vec, - rip: u64, - instruction_length: u64, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - ) -> Result<()> { - let mut padded = [0u8; 4]; - let copy_len = data.len().min(4); - padded[..copy_len].copy_from_slice(&data[..copy_len]); - let val = u32::from_le_bytes(padded); - - #[cfg(feature = "mem_profile")] - { - let regs = self.regs()?; - let trace_info = self.trace_info_mut(); - handle_outb(mem_mgr, host_funcs, port, val, ®s, trace_info)?; + #[expect(non_upper_case_globals, reason = "Windows API constant are lower case")] + fn run_vcpu(&mut self) -> Result { + let mut exit_context: WHV_RUN_VP_EXIT_CONTEXT = Default::default(); + + unsafe { + WHvRunVirtualProcessor( + self.partition, + 0, + &mut exit_context as *mut _ as *mut c_void, + std::mem::size_of::() as u32, + )?; } - #[cfg(not(feature = "mem_profile"))] - { - handle_outb(mem_mgr, host_funcs, port, val)?; - } - - let mut regs = self.regs()?; - regs.rip = rip + instruction_length; - self.set_regs(®s) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn run( - &mut self, - #[cfg(feature = "trace_guest")] tc: &mut crate::sandbox::trace::TraceContext, - ) -> Result { - #[cfg(feature = "trace_guest")] - tc.setup_guest_trace(Span::current().context()); - - let exit_context = self.processor.run()?; let result = match exit_context.ExitReason { - // WHvRunVpExitReasonX64IoPortAccess - WHV_RUN_VP_EXIT_REASON(2i32) => { - // size of current instruction is in lower byte of _bitfield - // see https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/whvexitcontextdatatypes) + WHvRunVpExitReasonX64IoPortAccess => unsafe { let instruction_length = exit_context.VpContext._bitfield & 0xF; - unsafe { - debug!( - "HyperV IO Details :\n Port: {:#x} \n {:#?}", - exit_context.Anonymous.IoPortAccess.PortNumber, &self - ); - HyperlightExit::IoOut( - exit_context.Anonymous.IoPortAccess.PortNumber, - exit_context - .Anonymous - .IoPortAccess - .Rax - .to_le_bytes() - .to_vec(), - exit_context.VpContext.Rip, - instruction_length as u64, - ) - } - } - // HvRunVpExitReasonX64Halt - WHV_RUN_VP_EXIT_REASON(8i32) => { - debug!("HyperV Halt Details :\n {:#?}", &self); - HyperlightExit::Halt() - } - // WHvRunVpExitReasonMemoryAccess - WHV_RUN_VP_EXIT_REASON(1i32) => { + let rip = exit_context.VpContext.Rip + instruction_length as u64; + self.set_registers(&[(WHvX64RegisterRip, WHV_REGISTER_VALUE { Reg64: rip })])?; + HyperlightExit::IoOut( + exit_context.Anonymous.IoPortAccess.PortNumber, + exit_context + .Anonymous + .IoPortAccess + .Rax + .to_le_bytes() + .to_vec(), + ) + }, + WHvRunVpExitReasonX64Halt => HyperlightExit::Halt(), + WHvRunVpExitReasonMemoryAccess => { let gpa = unsafe { exit_context.Anonymous.MemoryAccess.Gpa }; let access_info = unsafe { WHV_MEMORY_ACCESS_TYPE( @@ -551,299 +429,265 @@ impl Hypervisor for HypervWindowsDriver { ) }; let access_info = MemoryRegionFlags::try_from(access_info)?; - debug!( - "HyperV Memory Access Details :\n GPA: {:#?}\n Access Info :{:#?}\n {:#?} ", - gpa, access_info, &self - ); - - match get_memory_access_violation( - gpa as usize, - self.sandbox_regions.iter().chain(self.mmap_regions.iter()), - access_info, - ) { - Some(access_info) => access_info, - None => HyperlightExit::Mmio(gpa), + match access_info { + MemoryRegionFlags::READ => HyperlightExit::MmioRead(gpa), + MemoryRegionFlags::WRITE => HyperlightExit::MmioWrite(gpa), + _ => HyperlightExit::Unknown("Unknown memory access type".to_string()), } } - // WHvRunVpExitReasonCanceled - // Execution was cancelled by the host. - WHV_RUN_VP_EXIT_REASON(8193i32) => { - debug!("HyperV Cancelled Details :\n {:#?}", &self); - - HyperlightExit::Cancelled() - } + // Execution was cancelled by the host. + WHvRunVpExitReasonCanceled => HyperlightExit::Cancelled(), #[cfg(gdb)] - WHV_RUN_VP_EXIT_REASON(4098i32) => { - // Get information about the exception that triggered the exit + WHvRunVpExitReasonException => { let exception = unsafe { exit_context.Anonymous.VpException }; - match self.get_stop_reason(exception) { - Ok(reason) => HyperlightExit::Debug(reason), - Err(e) => { - log_then_return!("Error getting stop reason: {}", e); + // Get the DR6 register to see which breakpoint was hit + let dr6 = { + let names = [WHvX64RegisterDr6]; + let mut out: [Align16; 1] = unsafe { std::mem::zeroed() }; + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + names.as_ptr(), + 1, + out.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + )?; } + unsafe { out[0].0.Reg64 } + }; + + HyperlightExit::Debug { + dr6, + exception: exception.ExceptionType as u32, } } - WHV_RUN_VP_EXIT_REASON(_) => { - debug!( - "HyperV Unexpected Exit Details :#nReason {:#?}\n {:#?}", - exit_context.ExitReason, &self - ); - match self.get_exit_details(exit_context.ExitReason) { - Ok(error) => HyperlightExit::Unknown(error), - Err(e) => HyperlightExit::Unknown(format!("Error getting exit details: {}", e)), - } - } + WHV_RUN_VP_EXIT_REASON(_) => HyperlightExit::Unknown(format!( + "Unknown exit reason '{}'", + exit_context.ExitReason.0 + )), }; - Ok(result) } - /// Get regs - #[allow(dead_code)] - fn regs(&self) -> Result { - self.processor.regs() - } - /// Set regs - fn set_regs(&mut self, regs: &CommonRegisters) -> Result<()> { - self.processor.set_regs(regs) - } - /// Get fpu regs - #[allow(dead_code)] - fn fpu(&self) -> Result { - self.processor.fpu() - } - /// Set fpu regs - fn set_fpu(&mut self, fpu: &CommonFpu) -> Result<()> { - self.processor.set_fpu(fpu) - } - /// Get special regs - #[allow(dead_code)] - fn sregs(&self) -> Result { - self.processor.sregs() - } - /// Set special regs - #[allow(dead_code)] - fn set_sregs(&mut self, sregs: &CommonSpecialRegisters) -> Result<()> { - self.processor.set_sregs(sregs) + /// Mark that initial memory setup is complete. After this, map_memory will fail. + fn complete_initial_memory_setup(&mut self) { + self.initial_memory_setup_done = true; } +} + +#[cfg(gdb)] +impl DebuggableVm for WhpVm { + fn translate_gva(&self, gva: u64) -> Result { + let mut gpa = 0; + let mut result = WHV_TRANSLATE_GVA_RESULT::default(); + + // Only validate read access because the write access is handled through the + // host memory mapping + let translateflags = WHvTranslateGvaFlagValidateRead; + + unsafe { + WHvTranslateGva( + self.partition, + 0, + gva, + translateflags, + &mut result, + &mut gpa, + )?; + } - fn interrupt_handle(&self) -> Arc { - self.interrupt_handle.clone() + Ok(gpa) } - fn clear_cancel(&self) { - self.interrupt_handle.clear_cancel(); + fn set_debug(&mut self, enable: bool) -> Result<()> { + if enable { + // Set the extended VM exits property to enable extended VM exits + let mut property: WHV_PARTITION_PROPERTY = Default::default(); + property.ExtendedVmExits.AsUINT64 = 1 << 2; // EXTENDED_VM_EXIT_POS + + unsafe { + WHvSetPartitionProperty( + self.partition, + WHvPartitionPropertyCodeExtendedVmExits, + &property as *const _ as *const c_void, + std::mem::size_of::() as u32, + )?; + } + + // Set the exception exit bitmap to include debug trap and breakpoint trap + let mut exception_property: WHV_PARTITION_PROPERTY = Default::default(); + exception_property.ExceptionExitBitmap = (1 << WHvX64ExceptionTypeDebugTrapOrFault.0) + | (1 << WHvX64ExceptionTypeBreakpointTrap.0); + + unsafe { + WHvSetPartitionProperty( + self.partition, + WHvPartitionPropertyCodeExceptionExitBitmap, + &exception_property as *const _ as *const c_void, + std::mem::size_of::() as u32, + )?; + } + } + Ok(()) } - #[cfg(crashdump)] - fn crashdump_context(&self) -> Result> { - if self.rt_cfg.guest_core_dump { - let mut regs = [0; 27]; - - let vcpu_regs = self.processor.regs()?; - let sregs = self.processor.sregs()?; - let xsave = self.processor.get_xsave()?; - - // Set the registers in the order expected by the crashdump context - regs[0] = vcpu_regs.r15; // r15 - regs[1] = vcpu_regs.r14; // r14 - regs[2] = vcpu_regs.r13; // r13 - regs[3] = vcpu_regs.r12; // r12 - regs[4] = vcpu_regs.rbp; // rbp - regs[5] = vcpu_regs.rbx; // rbx - regs[6] = vcpu_regs.r11; // r11 - regs[7] = vcpu_regs.r10; // r10 - regs[8] = vcpu_regs.r9; // r9 - regs[9] = vcpu_regs.r8; // r8 - regs[10] = vcpu_regs.rax; // rax - regs[11] = vcpu_regs.rcx; // rcx - regs[12] = vcpu_regs.rdx; // rdx - regs[13] = vcpu_regs.rsi; // rsi - regs[14] = vcpu_regs.rdi; // rdi - regs[15] = 0; // orig rax - regs[16] = vcpu_regs.rip; // rip - regs[17] = sregs.cs.selector as u64; // cs - regs[18] = vcpu_regs.rflags; // eflags - regs[19] = vcpu_regs.rsp; // rsp - regs[20] = sregs.ss.selector as u64; // ss - regs[21] = sregs.fs.base; // fs_base - regs[22] = sregs.gs.base; // gs_base - regs[23] = sregs.ds.selector as u64; // ds - regs[24] = sregs.es.selector as u64; // es - regs[25] = sregs.fs.selector as u64; // fs - regs[26] = sregs.gs.selector as u64; // gs - - // Get the filename from the config - let filename = self.rt_cfg.binary_path.clone().and_then(|path| { - Path::new(&path) - .file_name() - .and_then(|name| name.to_os_string().into_string().ok()) - }); - - // Include both initial sandbox regions and dynamically mapped regions - let mut regions: Vec = self.sandbox_regions.clone(); - regions.extend(self.mmap_regions.iter().cloned()); - Ok(Some(crashdump::CrashDumpContext::new( - regions, - regs, - xsave, - self.entrypoint, - self.rt_cfg.binary_path.clone(), - filename, - ))) + fn set_single_step(&mut self, enable: bool) -> Result<()> { + let mut regs = self.regs()?; + if enable { + regs.rflags |= 1 << 8; } else { - Ok(None) + regs.rflags &= !(1 << 8); } + self.set_regs(®s)?; + Ok(()) } - #[cfg(gdb)] - fn handle_debug( - &mut self, - dbg_mem_access_fn: Arc>>, - stop_reason: super::gdb::VcpuStopReason, - ) -> Result<()> { - if self.debug.is_none() { - return Err(new_error!("Debugging is not enabled")); + fn add_hw_breakpoint(&mut self, addr: u64) -> Result<()> { + use crate::hypervisor::gdb::arch::MAX_NO_OF_HW_BP; + + // Get current debug registers + const LEN: usize = 6; + let names: [WHV_REGISTER_NAME; LEN] = [ + WHvX64RegisterDr0, + WHvX64RegisterDr1, + WHvX64RegisterDr2, + WHvX64RegisterDr3, + WHvX64RegisterDr6, + WHvX64RegisterDr7, + ]; + + let mut out: [Align16; LEN] = unsafe { std::mem::zeroed() }; + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + names.as_ptr(), + LEN as u32, + out.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + )?; } - let mem_access = DebugMemoryAccess { - dbg_mem_access_fn, - guest_mmap_regions: self.mmap_regions.to_vec(), - }; - - match stop_reason { - // If the vCPU stopped because of a crash, we need to handle it differently - // We do not want to allow resuming execution or placing breakpoints - // because the guest has crashed. - // We only allow reading registers and memory - VcpuStopReason::Crash => { - self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) - .map_err(|e| { - new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e) - })?; - - loop { - log::debug!("Debug wait for event to resume vCPU"); - // Wait for a message from gdb - let req = self.recv_dbg_msg()?; - - // Flag to store if we should deny continue or step requests - let mut deny_continue = false; - // Flag to store if we should detach from the gdb session - let mut detach = false; - - let response = match req { - // Allow the detach request to disable debugging by continuing resuming - // hypervisor crash error reporting - DebugMsg::DisableDebug => { - detach = true; - DebugResponse::DisableDebug - } - // Do not allow continue or step requests - DebugMsg::Continue | DebugMsg::Step => { - deny_continue = true; - DebugResponse::NotAllowed - } - // Do not allow adding/removing breakpoints and writing to memory or registers - DebugMsg::AddHwBreakpoint(_) - | DebugMsg::AddSwBreakpoint(_) - | DebugMsg::RemoveHwBreakpoint(_) - | DebugMsg::RemoveSwBreakpoint(_) - | DebugMsg::WriteAddr(_, _) - | DebugMsg::WriteRegisters(_) => DebugResponse::NotAllowed, - - // For all other requests, we will process them normally - _ => { - let result = self.process_dbg_request(req, &mem_access); - match result { - Ok(response) => response, - Err(HyperlightError::TranslateGuestAddress(_)) => { - // Treat non fatal errors separately so the guest doesn't fail - DebugResponse::ErrorOccurred - } - Err(e) => { - log::error!("Error processing debug request: {:?}", e); - return Err(e); - } - } - } - }; - - // Send the response to the request back to gdb - self.send_dbg_msg(response) - .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; - - // If we are denying continue or step requests, the debugger assumes the - // execution started so we need to report a stop reason as a crash and let - // it request to read registers/memory to figure out what happened - if deny_continue { - self.send_dbg_msg(DebugResponse::VcpuStopped(VcpuStopReason::Crash)) - .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; - } - - // If we are detaching, we will break the loop and the Hypervisor will continue - // to handle the Crash reason - if detach { - break; - } - } - } + let mut dr0 = unsafe { out[0].0.Reg64 }; + let mut dr1 = unsafe { out[1].0.Reg64 }; + let mut dr2 = unsafe { out[2].0.Reg64 }; + let mut dr3 = unsafe { out[3].0.Reg64 }; + let mut dr7 = unsafe { out[5].0.Reg64 }; - // If the vCPU stopped because of any other reason except a crash, we can handle it - // normally - _ => { - self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) - .map_err(|e| { - new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e) - })?; - - loop { - log::debug!("Debug wait for event to resume vCPU"); - - // Wait for a message from gdb - let req = self.recv_dbg_msg()?; - - let result = self.process_dbg_request(req, &mem_access); - - let response = match result { - Ok(response) => response, - // Treat non fatal errors separately so the guest doesn't fail - Err(HyperlightError::TranslateGuestAddress(_)) => { - DebugResponse::ErrorOccurred - } - Err(e) => { - return Err(e); - } - }; - - // If the command was either step or continue, we need to run the vcpu - let cont = matches!( - response, - DebugResponse::Step | DebugResponse::Continue | DebugResponse::DisableDebug - ); - - self.send_dbg_msg(response) - .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; - - if cont { - break; - } - } - } + // Check if breakpoint already exists + if [dr0, dr1, dr2, dr3].contains(&addr) { + return Ok(()); } + // Find the first available LOCAL (L0–L3) slot + let i = (0..MAX_NO_OF_HW_BP) + .position(|i| dr7 & (1 << (i * 2)) == 0) + .ok_or_else(|| new_error!("Tried to add more than 4 hardware breakpoints"))?; + + // Assign to corresponding debug register + *[&mut dr0, &mut dr1, &mut dr2, &mut dr3][i] = addr; + + // Enable LOCAL bit + dr7 |= 1 << (i * 2); + + // Set the debug registers + let registers = vec![ + (WHvX64RegisterDr0, WHV_REGISTER_VALUE { Reg64: dr0 }), + (WHvX64RegisterDr1, WHV_REGISTER_VALUE { Reg64: dr1 }), + (WHvX64RegisterDr2, WHV_REGISTER_VALUE { Reg64: dr2 }), + (WHvX64RegisterDr3, WHV_REGISTER_VALUE { Reg64: dr3 }), + (WHvX64RegisterDr7, WHV_REGISTER_VALUE { Reg64: dr7 }), + ]; + self.set_registers(®isters)?; Ok(()) } - #[cfg(feature = "mem_profile")] - fn trace_info_mut(&mut self) -> &mut MemTraceInfo { - &mut self.trace_info + fn remove_hw_breakpoint(&mut self, addr: u64) -> Result<()> { + // Get current debug registers + const LEN: usize = 6; + let names: [WHV_REGISTER_NAME; LEN] = [ + WHvX64RegisterDr0, + WHvX64RegisterDr1, + WHvX64RegisterDr2, + WHvX64RegisterDr3, + WHvX64RegisterDr6, + WHvX64RegisterDr7, + ]; + + let mut out: [Align16; LEN] = unsafe { std::mem::zeroed() }; + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + names.as_ptr(), + LEN as u32, + out.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + )?; + } + + let mut dr0 = unsafe { out[0].0.Reg64 }; + let mut dr1 = unsafe { out[1].0.Reg64 }; + let mut dr2 = unsafe { out[2].0.Reg64 }; + let mut dr3 = unsafe { out[3].0.Reg64 }; + let mut dr7 = unsafe { out[5].0.Reg64 }; + + let regs = [&mut dr0, &mut dr1, &mut dr2, &mut dr3]; + + if let Some(i) = regs.iter().position(|&&mut reg| reg == addr) { + // Clear the address + *regs[i] = 0; + // Disable LOCAL bit + dr7 &= !(1 << (i * 2)); + + // Set the debug registers + let registers = vec![ + (WHvX64RegisterDr0, WHV_REGISTER_VALUE { Reg64: dr0 }), + (WHvX64RegisterDr1, WHV_REGISTER_VALUE { Reg64: dr1 }), + (WHvX64RegisterDr2, WHV_REGISTER_VALUE { Reg64: dr2 }), + (WHvX64RegisterDr3, WHV_REGISTER_VALUE { Reg64: dr3 }), + (WHvX64RegisterDr7, WHV_REGISTER_VALUE { Reg64: dr7 }), + ]; + self.set_registers(®isters)?; + Ok(()) + } else { + Err(new_error!("Tried to remove non-existing hw-breakpoint")) + } } } -impl Drop for HypervWindowsDriver { +impl Drop for WhpVm { fn drop(&mut self) { - self.interrupt_handle.set_dropped(); + if let Err(e) = unsafe { WHvDeletePartition(self.partition) } { + log::error!("Failed to delete partition: {}", e); + } + } +} + +unsafe fn try_load_whv_map_gpa_range2() -> Result { + let library = unsafe { + LoadLibraryExA( + s!("winhvplatform.dll"), + None, + LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, + ) + }; + + if let Err(e) = library { + return Err(new_error!("{}", e)); + } + + #[allow(clippy::unwrap_used)] + // We know this will succeed because we just checked for an error above + let library = library.unwrap(); + + let address = unsafe { GetProcAddress(library, s!("WHvMapGpaRange2")) }; + + if address.is_none() { + unsafe { FreeLibrary(library)? }; + return Err(new_error!( + "Failed to find WHvMapGpaRange2 in winhvplatform.dll" + )); } + + unsafe { Ok(std::mem::transmute_copy(&address)) } } diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index 51fc6b0e7..d80c100a4 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -14,45 +14,23 @@ See the License for the specific language governing permissions and limitations under the License. */ -use std::fmt::Debug; -use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64}; -use std::sync::{Arc, Mutex}; +use std::sync::LazyLock; -use kvm_bindings::{kvm_fpu, kvm_regs, kvm_sregs, kvm_userspace_memory_region}; +#[cfg(gdb)] +use kvm_bindings::kvm_guest_debug; +use kvm_bindings::kvm_userspace_memory_region; use kvm_ioctls::Cap::UserMemory; use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd}; -use log::LevelFilter; use tracing::{Span, instrument}; -#[cfg(feature = "trace_guest")] -use tracing_opentelemetry::OpenTelemetrySpanExt; -#[cfg(crashdump)] -use {super::crashdump, std::path::Path}; #[cfg(gdb)] -use super::gdb::{ - DebugCommChannel, DebugMemoryAccess, DebugMsg, DebugResponse, GuestDebug, KvmDebug, - VcpuStopReason, -}; -use super::{Hypervisor, LinuxInterruptHandle}; -#[cfg(gdb)] -use crate::HyperlightError; -use crate::hypervisor::regs::{CommonFpu, CommonRegisters}; -use crate::hypervisor::{HyperlightExit, InterruptHandle, InterruptHandleImpl}; +use crate::hypervisor::gdb::DebuggableVm; +use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; +use crate::hypervisor::{HyperlightExit, Hypervisor}; use crate::mem::memory_region::MemoryRegion; -use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::{GuestPtr, RawPtr}; -use crate::mem::shared_mem::HostSharedMemory; -use crate::sandbox::SandboxConfiguration; -use crate::sandbox::host_funcs::FunctionRegistry; -use crate::sandbox::outb::handle_outb; -#[cfg(feature = "mem_profile")] -use crate::sandbox::trace::MemTraceInfo; -#[cfg(crashdump)] -use crate::sandbox::uninitialized::SandboxRuntimeConfig; -use crate::{Result, log_then_return, new_error}; +use crate::{Result, new_error}; /// Return `true` if the KVM API is available, version 12, and has UserMemory capability, or `false` otherwise -#[instrument(skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn is_hypervisor_present() -> bool { if let Ok(kvm) = Kvm::new() { let api_version = kvm.get_api_version(); @@ -73,361 +51,72 @@ pub(crate) fn is_hypervisor_present() -> bool { } } -#[cfg(gdb)] -mod debug { - use kvm_bindings::kvm_debug_exit_arch; - - use super::KVMDriver; - use crate::hypervisor::gdb::{ - DebugMemoryAccess, DebugMsg, DebugResponse, GuestDebug, KvmDebug, VcpuStopReason, - }; - use crate::{Result, new_error}; - - impl KVMDriver { - /// Resets the debug information to disable debugging - fn disable_debug(&mut self) -> Result<()> { - let mut debug = KvmDebug::default(); - - debug.set_single_step(&self.vcpu_fd, false)?; - - self.debug = Some(debug); - - Ok(()) - } - - /// Get the reason the vCPU has stopped - pub(crate) fn get_stop_reason( - &mut self, - debug_exit: kvm_debug_exit_arch, - ) -> Result { - let debug = self - .debug - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - debug.get_stop_reason(&self.vcpu_fd, debug_exit, self.entrypoint) - } - - pub(crate) fn process_dbg_request( - &mut self, - req: DebugMsg, - mem_access: &DebugMemoryAccess, - ) -> Result { - if let Some(debug) = self.debug.as_mut() { - match req { - DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint( - debug - .add_hw_breakpoint(&self.vcpu_fd, addr) - .map_err(|e| { - log::error!("Failed to add hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint( - debug - .add_sw_breakpoint(&self.vcpu_fd, addr, mem_access) - .map_err(|e| { - log::error!("Failed to add sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Continue => { - debug.set_single_step(&self.vcpu_fd, false).map_err(|e| { - log::error!("Failed to continue execution: {:?}", e); - - e - })?; - - Ok(DebugResponse::Continue) - } - DebugMsg::DisableDebug => { - self.disable_debug().map_err(|e| { - log::error!("Failed to disable debugging: {:?}", e); - - e - })?; - - Ok(DebugResponse::DisableDebug) - } - DebugMsg::GetCodeSectionOffset => { - let offset = mem_access - .dbg_mem_access_fn - .try_lock() - .map_err(|e| { - new_error!("Error locking at {}:{}: {}", file!(), line!(), e) - })? - .layout - .get_guest_code_address(); - - Ok(DebugResponse::GetCodeSectionOffset(offset as u64)) - } - DebugMsg::ReadAddr(addr, len) => { - let mut data = vec![0u8; len]; - - debug.read_addrs(&self.vcpu_fd, addr, &mut data, mem_access)?; - - Ok(DebugResponse::ReadAddr(data)) - } - DebugMsg::ReadRegisters => debug - .read_regs(&self.vcpu_fd) - .map_err(|e| { - log::error!("Failed to read registers: {:?}", e); - - e - }) - .map(|(regs, fpu)| DebugResponse::ReadRegisters(Box::new((regs, fpu)))), - DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint( - debug - .remove_hw_breakpoint(&self.vcpu_fd, addr) - .map_err(|e| { - log::error!("Failed to remove hw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint( - debug - .remove_sw_breakpoint(&self.vcpu_fd, addr, mem_access) - .map_err(|e| { - log::error!("Failed to remove sw breakpoint: {:?}", e); - - e - }) - .is_ok(), - )), - DebugMsg::Step => { - debug.set_single_step(&self.vcpu_fd, true).map_err(|e| { - log::error!("Failed to enable step instruction: {:?}", e); - - e - })?; - - Ok(DebugResponse::Step) - } - DebugMsg::WriteAddr(addr, data) => { - debug.write_addrs(&self.vcpu_fd, addr, &data, mem_access)?; - - Ok(DebugResponse::WriteAddr) - } - DebugMsg::WriteRegisters(boxed_regs) => { - let (regs, fpu) = boxed_regs.as_ref(); - debug - .write_regs(&self.vcpu_fd, regs, fpu) - .map_err(|e| { - log::error!("Failed to write registers: {:?}", e); - - e - }) - .map(|_| DebugResponse::WriteRegisters) - } - } - } else { - Err(new_error!("Debugging is not enabled")) - } - } - - pub(crate) fn recv_dbg_msg(&mut self) -> Result { - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - gdb_conn.recv().map_err(|e| { - new_error!( - "Got an error while waiting to receive a message from the gdb thread: {:?}", - e - ) - }) - } - - pub(crate) fn send_dbg_msg(&mut self, cmd: DebugResponse) -> Result<()> { - log::debug!("Sending {:?}", cmd); - - let gdb_conn = self - .gdb_conn - .as_mut() - .ok_or_else(|| new_error!("Debug is not enabled"))?; - - gdb_conn.send(cmd).map_err(|e| { - new_error!( - "Got an error while sending a response message to the gdb thread: {:?}", - e - ) - }) - } - } -} - -/// A Hypervisor driver for KVM on Linux -pub(crate) struct KVMDriver { - _kvm: Kvm, +/// A KVM implementation of a single-vcpu VM +#[derive(Debug)] +pub(crate) struct KvmVm { vm_fd: VmFd, - page_size: usize, vcpu_fd: VcpuFd, - entrypoint: u64, - orig_rsp: GuestPtr, - interrupt_handle: Arc, + // KVM as opposed to mshv/whp has no way to get current debug regs, so need to keep a copy here #[cfg(gdb)] - debug: Option, - #[cfg(gdb)] - gdb_conn: Option>, - #[cfg(crashdump)] - rt_cfg: SandboxRuntimeConfig, - #[cfg(feature = "mem_profile")] - trace_info: MemTraceInfo, + debug_regs: kvm_guest_debug, } -impl KVMDriver { - /// Create a new instance of a `KVMDriver`, with only control registers - /// set. Standard registers will not be set, and `initialise` must - /// be called to do so. - #[allow(clippy::too_many_arguments)] - // TODO: refactor this function to take fewer arguments. Add trace_info to rt_cfg - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn new( - pml4_addr: u64, - entrypoint: u64, - rsp: u64, - config: &SandboxConfiguration, - #[cfg(gdb)] gdb_conn: Option>, - #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, - #[cfg(feature = "mem_profile")] trace_info: MemTraceInfo, - ) -> Result { - let kvm = Kvm::new()?; - - let vm_fd = kvm.create_vm_with_type(0)?; +static KVM: LazyLock> = + LazyLock::new(|| Kvm::new().map_err(|e| new_error!("Failed to open /dev/kvm: {}", e))); +impl KvmVm { + /// Create a new instance of a `KvmVm` + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + pub(crate) fn new() -> Result { + let hv = KVM + .as_ref() + .map_err(|e| new_error!("Failed to create KVM instance: {}", e))?; + let vm_fd = hv.create_vm_with_type(0)?; let vcpu_fd = vm_fd.create_vcpu(0)?; - #[cfg(gdb)] - let (debug, gdb_conn) = if let Some(gdb_conn) = gdb_conn { - let mut debug = KvmDebug::new(); - // Add breakpoint to the entry point address - debug.add_hw_breakpoint(&vcpu_fd, entrypoint)?; - - (Some(debug), Some(gdb_conn)) - } else { - (None, None) - }; - - let rsp_gp = GuestPtr::try_from(RawPtr::from(rsp))?; - - let interrupt_handle: Arc = Arc::new(LinuxInterruptHandle { - state: AtomicU8::new(0), - #[cfg(all( - target_arch = "x86_64", - target_vendor = "unknown", - target_os = "linux", - target_env = "musl" - ))] - tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }), - #[cfg(not(all( - target_arch = "x86_64", - target_vendor = "unknown", - target_os = "linux", - target_env = "musl" - )))] - tid: AtomicU64::new(unsafe { libc::pthread_self() }), - retry_delay: config.get_interrupt_retry_delay(), - sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(), - dropped: AtomicBool::new(false), - }); - - let mut kvm = Self { - _kvm: kvm, + Ok(Self { vm_fd, - page_size: 0, vcpu_fd, - entrypoint, - orig_rsp: rsp_gp, - interrupt_handle: interrupt_handle.clone(), - #[cfg(gdb)] - debug, #[cfg(gdb)] - gdb_conn, - #[cfg(crashdump)] - rt_cfg, - #[cfg(feature = "mem_profile")] - trace_info, - }; - - kvm.setup_initial_sregs(pml4_addr)?; - - // Send the interrupt handle to the GDB thread if debugging is enabled - // This is used to allow the GDB thread to stop the vCPU - #[cfg(gdb)] - if kvm.debug.is_some() { - kvm.send_dbg_msg(DebugResponse::InterruptHandle(interrupt_handle))?; - } - - Ok(kvm) + debug_regs: kvm_guest_debug::default(), + }) } } -impl Debug for KVMDriver { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut f = f.debug_struct("KVM Driver"); - // Output each memory region - - let regs = self.vcpu_fd.get_regs(); - // check that regs is OK and then set field in debug struct +impl Hypervisor for KvmVm { + fn regs(&self) -> Result { + Ok((&self.vcpu_fd.get_regs()?).into()) + } - if let Ok(regs) = regs { - f.field("Registers", ®s); - } + fn set_regs(&self, regs: &CommonRegisters) -> Result<()> { + Ok(self.vcpu_fd.set_regs(®s.into())?) + } - let sregs = self.vcpu_fd.get_sregs(); + fn sregs(&self) -> Result { + Ok((&self.vcpu_fd.get_sregs()?).into()) + } - // check that sregs is OK and then set field in debug struct + fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()> { + Ok(self.vcpu_fd.set_sregs(&sregs.into())?) + } - if let Ok(sregs) = sregs { - f.field("Special Registers", &sregs); - } + fn fpu(&self) -> Result { + Ok((&self.vcpu_fd.get_fpu()?).into()) + } - f.finish() + fn set_fpu(&self, fpu: &CommonFpu) -> Result<()> { + Ok(self.vcpu_fd.set_fpu(&fpu.into())?) } -} -impl Hypervisor for KVMDriver { - /// Implementation of initialise for Hypervisor trait. - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn initialise( - &mut self, - peb_addr: RawPtr, - seed: u64, - page_size: u32, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - max_guest_log_level: Option, - #[cfg(gdb)] dbg_mem_access_fn: Arc>>, - ) -> Result<()> { - self.page_size = page_size as usize; - - let max_guest_log_level: u64 = match max_guest_log_level { - Some(level) => level as u64, - None => self.get_max_log_level().into(), - }; - - let regs = CommonRegisters { - rip: self.entrypoint, - rsp: self.orig_rsp.absolute()?, - - // function args - rdi: peb_addr.into(), - rsi: seed, - rdx: page_size.into(), - rcx: max_guest_log_level, - - ..Default::default() - }; - self.set_regs(®s)?; - Ok(()) + #[cfg(crashdump)] + fn xsave(&self) -> Result> { + let xsave = self.vcpu_fd.get_xsave()?; + Ok(xsave + .region + .into_iter() + .flat_map(u32::to_le_bytes) + .collect()) } unsafe fn map_memory(&mut self, (slot, region): (u32, &MemoryRegion)) -> Result<()> { @@ -448,28 +137,6 @@ impl Hypervisor for KVMDriver { Ok(()) } - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - fn dispatch_call_from_host( - &mut self, - dispatch_func_addr: RawPtr, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - #[cfg(gdb)] dbg_mem_access_fn: Arc>>, - ) -> Result<()> { - // Reset general purpose registers, then set RIP and RSP - let regs = CommonRegisters { - rip: dispatch_func_addr.into(), - rsp: self.orig_rsp.absolute()?, - ..Default::default() - }; - self.set_regs(®s)?; - - // reset fpu state - self.set_fpu(&CommonFpu::default())?; - - Ok(()) - } - fn run_vcpu(&mut self) -> Result { match self.vcpu_fd.run() { Ok(VcpuExit::Hlt) => Ok(HyperlightExit::Halt()), @@ -496,262 +163,95 @@ impl Hypervisor for KVMDriver { ))), } } +} - fn regs(&self) -> Result { - let kvm_regs = self.vcpu_fd.get_regs()?; - Ok((&kvm_regs).into()) - } - - fn set_regs(&mut self, regs: &super::regs::CommonRegisters) -> Result<()> { - let kvm_regs: kvm_regs = regs.into(); - self.vcpu_fd.set_regs(&kvm_regs)?; - Ok(()) - } - - fn fpu(&self) -> Result { - let kvm_fpu = self.vcpu_fd.get_fpu()?; - Ok((&kvm_fpu).into()) - } +#[cfg(gdb)] +impl DebuggableVm for KvmVm { + fn translate_gva(&self, gva: u64) -> Result { + use crate::HyperlightError; - fn set_fpu(&mut self, fpu: &super::regs::CommonFpu) -> Result<()> { - let kvm_fpu: kvm_fpu = fpu.into(); - self.vcpu_fd.set_fpu(&kvm_fpu)?; - Ok(()) + let gpa = self.vcpu_fd.translate_gva(gva)?; + if gpa.valid == 0 { + Err(HyperlightError::TranslateGuestAddress(gva)) + } else { + Ok(gpa.physical_address) + } } - fn sregs(&self) -> Result { - let kvm_sregs = self.vcpu_fd.get_sregs()?; - Ok((&kvm_sregs).into()) - } + fn set_debug(&mut self, enable: bool) -> Result<()> { + use kvm_bindings::{KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_USE_HW_BP, KVM_GUESTDBG_USE_SW_BP}; - fn set_sregs(&mut self, sregs: &super::regs::CommonSpecialRegisters) -> Result<()> { - let kvm_sregs: kvm_sregs = sregs.into(); - self.vcpu_fd.set_sregs(&kvm_sregs)?; + log::info!("Setting debug to {}", enable); + if enable { + self.debug_regs.control |= + KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP | KVM_GUESTDBG_USE_SW_BP; + } else { + self.debug_regs.control &= + !(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP | KVM_GUESTDBG_USE_SW_BP); + } + self.vcpu_fd.set_guest_debug(&self.debug_regs)?; Ok(()) } - fn interrupt_handle(&self) -> Arc { - self.interrupt_handle.clone() - } + fn set_single_step(&mut self, enable: bool) -> Result<()> { + use kvm_bindings::KVM_GUESTDBG_SINGLESTEP; - fn clear_cancel(&self) { - self.interrupt_handle.clear_cancel(); - } + log::info!("Setting single step to {}", enable); + if enable { + self.debug_regs.control |= KVM_GUESTDBG_SINGLESTEP; + } else { + self.debug_regs.control &= !KVM_GUESTDBG_SINGLESTEP; + } + self.vcpu_fd.set_guest_debug(&self.debug_regs)?; - #[cfg(crashdump)] - fn crashdump_context(&self) -> Result> { - if self.rt_cfg.guest_core_dump { - let mut regs = [0; 27]; - - let vcpu_regs = self.vcpu_fd.get_regs()?; - let sregs = self.vcpu_fd.get_sregs()?; - let xsave = self.vcpu_fd.get_xsave()?; - - // Set the registers in the order expected by the crashdump context - regs[0] = vcpu_regs.r15; // r15 - regs[1] = vcpu_regs.r14; // r14 - regs[2] = vcpu_regs.r13; // r13 - regs[3] = vcpu_regs.r12; // r12 - regs[4] = vcpu_regs.rbp; // rbp - regs[5] = vcpu_regs.rbx; // rbx - regs[6] = vcpu_regs.r11; // r11 - regs[7] = vcpu_regs.r10; // r10 - regs[8] = vcpu_regs.r9; // r9 - regs[9] = vcpu_regs.r8; // r8 - regs[10] = vcpu_regs.rax; // rax - regs[11] = vcpu_regs.rcx; // rcx - regs[12] = vcpu_regs.rdx; // rdx - regs[13] = vcpu_regs.rsi; // rsi - regs[14] = vcpu_regs.rdi; // rdi - regs[15] = 0; // orig rax - regs[16] = vcpu_regs.rip; // rip - regs[17] = sregs.cs.selector as u64; // cs - regs[18] = vcpu_regs.rflags; // eflags - regs[19] = vcpu_regs.rsp; // rsp - regs[20] = sregs.ss.selector as u64; // ss - regs[21] = sregs.fs.base; // fs_base - regs[22] = sregs.gs.base; // gs_base - regs[23] = sregs.ds.selector as u64; // ds - regs[24] = sregs.es.selector as u64; // es - regs[25] = sregs.fs.selector as u64; // fs - regs[26] = sregs.gs.selector as u64; // gs - - // Get the filename from the runtime config - let filename = self.rt_cfg.binary_path.clone().and_then(|path| { - Path::new(&path) - .file_name() - .and_then(|name| name.to_os_string().into_string().ok()) - }); - - // The [`CrashDumpContext`] accepts xsave as a vector of u8, so we need to convert the - // xsave region to a vector of u8 - // Also include mapped regions in addition to the initial sandbox regions - let mut regions: Vec = self.sandbox_regions.clone(); - regions.extend(self.mmap_regions.iter().map(|(r, _)| r.clone())); - Ok(Some(crashdump::CrashDumpContext::new( - regions, - regs, - xsave - .region - .iter() - .flat_map(|item| item.to_le_bytes()) - .collect::>(), - self.entrypoint, - self.rt_cfg.binary_path.clone(), - filename, - ))) + // Set TF Flag to enable Traps + let mut regs = self.regs()?; + if enable { + regs.rflags |= 1 << 8; } else { - Ok(None) + regs.rflags &= !(1 << 8); } + self.set_regs(®s)?; + Ok(()) } - #[cfg(gdb)] - fn handle_debug( - &mut self, - dbg_mem_access_fn: Arc>>, - stop_reason: VcpuStopReason, - ) -> Result<()> { - if self.debug.is_none() { - return Err(new_error!("Debugging is not enabled")); - } + fn add_hw_breakpoint(&mut self, addr: u64) -> Result<()> { + use crate::hypervisor::gdb::arch::MAX_NO_OF_HW_BP; - let mem_access = DebugMemoryAccess { - dbg_mem_access_fn, - guest_mmap_regions: self.mmap_regions.iter().map(|(r, _)| r.clone()).collect(), - }; - - match stop_reason { - // If the vCPU stopped because of a crash, we need to handle it differently - // We do not want to allow resuming execution or placing breakpoints - // because the guest has crashed. - // We only allow reading registers and memory - VcpuStopReason::Crash => { - self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) - .map_err(|e| { - new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e) - })?; - - loop { - log::debug!("Debug wait for event to resume vCPU"); - // Wait for a message from gdb - let req = self.recv_dbg_msg()?; - - // Flag to store if we should deny continue or step requests - let mut deny_continue = false; - // Flag to store if we should detach from the gdb session - let mut detach = false; - - let response = match req { - // Allow the detach request to disable debugging by continuing resuming - // hypervisor crash error reporting - DebugMsg::DisableDebug => { - detach = true; - DebugResponse::DisableDebug - } - // Do not allow continue or step requests - DebugMsg::Continue | DebugMsg::Step => { - deny_continue = true; - DebugResponse::NotAllowed - } - // Do not allow adding/removing breakpoints and writing to memory or registers - DebugMsg::AddHwBreakpoint(_) - | DebugMsg::AddSwBreakpoint(_) - | DebugMsg::RemoveHwBreakpoint(_) - | DebugMsg::RemoveSwBreakpoint(_) - | DebugMsg::WriteAddr(_, _) - | DebugMsg::WriteRegisters(_) => DebugResponse::NotAllowed, - - // For all other requests, we will process them normally - _ => { - let result = self.process_dbg_request(req, &mem_access); - match result { - Ok(response) => response, - Err(HyperlightError::TranslateGuestAddress(_)) => { - // Treat non fatal errors separately so the guest doesn't fail - DebugResponse::ErrorOccurred - } - Err(e) => { - log::error!("Error processing debug request: {:?}", e); - return Err(e); - } - } - } - }; - - // Send the response to the request back to gdb - self.send_dbg_msg(response) - .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; - - // If we are denying continue or step requests, the debugger assumes the - // execution started so we need to report a stop reason as a crash and let - // it request to read registers/memory to figure out what happened - if deny_continue { - self.send_dbg_msg(DebugResponse::VcpuStopped(VcpuStopReason::Crash)) - .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; - } - - // If we are detaching, we will break the loop and the Hypervisor will continue - // to handle the Crash reason - if detach { - break; - } - } - } - // If the vCPU stopped because of any other reason except a crash, we can handle it - // normally - _ => { - // Send the stop reason to the gdb thread - self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) - .map_err(|e| { - new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e) - })?; - - loop { - log::debug!("Debug wait for event to resume vCPU"); - // Wait for a message from gdb - let req = self.recv_dbg_msg()?; - - let result = self.process_dbg_request(req, &mem_access); - - let response = match result { - Ok(response) => response, - // Treat non fatal errors separately so the guest doesn't fail - Err(HyperlightError::TranslateGuestAddress(_)) => { - DebugResponse::ErrorOccurred - } - Err(e) => { - return Err(e); - } - }; - - let cont = matches!( - response, - DebugResponse::Continue | DebugResponse::Step | DebugResponse::DisableDebug - ); - - self.send_dbg_msg(response) - .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; - - // Check if we should continue execution - // We continue if the response is one of the following: Step, Continue, or DisableDebug - if cont { - break; - } - } - } + // Check if breakpoint already exists + if self.debug_regs.arch.debugreg[..4].contains(&addr) { + return Ok(()); } + // Find the first available LOCAL (L0–L3) slot + let i = (0..MAX_NO_OF_HW_BP) + .position(|i| self.debug_regs.arch.debugreg[7] & (1 << (i * 2)) == 0) + .ok_or_else(|| new_error!("Tried to add more than 4 hardware breakpoints"))?; + + // Assign to corresponding debug register + self.debug_regs.arch.debugreg[i] = addr; + + // Enable LOCAL bit + self.debug_regs.arch.debugreg[7] |= 1 << (i * 2); + + self.vcpu_fd.set_guest_debug(&self.debug_regs)?; Ok(()) } - #[cfg(feature = "mem_profile")] - fn trace_info_mut(&mut self) -> &mut MemTraceInfo { - &mut self.trace_info - } -} + fn remove_hw_breakpoint(&mut self, addr: u64) -> Result<()> { + // Find the index of the breakpoint + let index = self.debug_regs.arch.debugreg[..4] + .iter() + .position(|&a| a == addr) + .ok_or_else(|| new_error!("Tried to remove non-existing hw-breakpoint"))?; + + // Clear the address + self.debug_regs.arch.debugreg[index] = 0; -impl Drop for KVMDriver { - fn drop(&mut self) { - self.interrupt_handle.set_dropped(); + // Disable LOCAL bit + self.debug_regs.arch.debugreg[7] &= !(1 << (index * 2)); + + self.vcpu_fd.set_guest_debug(&self.debug_regs)?; + Ok(()) } } diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index cc80610d0..45f56ffd5 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -18,15 +18,12 @@ use log::LevelFilter; use crate::Result; use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; -use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType}; -#[cfg(feature = "mem_profile")] -use crate::sandbox::trace::MemTraceInfo; +use crate::mem::memory_region::MemoryRegion; /// HyperV-on-linux functionality #[cfg(mshv3)] -pub mod hyperv_linux; +pub(crate) mod hyperv_linux; #[cfg(target_os = "windows")] -/// Hyperv-on-windows functionality pub(crate) mod hyperv_windows; /// GDB debugging support @@ -38,16 +35,14 @@ pub(crate) mod regs; #[cfg(kvm)] /// Functionality to manipulate KVM-based virtual machines -pub mod kvm; +pub(crate) mod kvm; + #[cfg(target_os = "windows")] /// Hyperlight Surrogate Process pub(crate) mod surrogate_process; #[cfg(target_os = "windows")] /// Hyperlight Surrogate Process pub(crate) mod surrogate_process_manager; -/// WindowsHypervisorPlatform utilities -#[cfg(target_os = "windows")] -pub(crate) mod windows_hypervisor_platform; /// Safe wrappers around windows types like `PSTR` #[cfg(target_os = "windows")] pub(crate) mod wrappers; @@ -60,23 +55,14 @@ pub(crate) mod hyperlight_vm; use std::fmt::Debug; use std::str::FromStr; #[cfg(any(kvm, mshv3))] -use std::sync::atomic::AtomicU64; +use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; +#[cfg(target_os = "windows")] use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; -use std::sync::{Arc, Mutex}; #[cfg(any(kvm, mshv3))] use std::time::Duration; -#[cfg(gdb)] -use gdb::VcpuStopReason; - -use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::RawPtr; -use crate::mem::shared_mem::HostSharedMemory; -use crate::sandbox::host_funcs::FunctionRegistry; - -/// These are the generic exit reasons that we can handle from a Hypervisor the Hypervisors run method is responsible for mapping from -/// the hypervisor specific exit reasons to these generic ones -pub enum HyperlightExit { +pub(crate) enum HyperlightExit { + /// The vCPU has exited due to a debug event (usually breakpoint) #[cfg(gdb)] Debug { dr6: u64, exception: u32 }, /// The vCPU has halted @@ -102,22 +88,9 @@ pub enum HyperlightExit { Retry(), } -/// A common set of hypervisor functionality -pub(crate) trait Hypervisor: Debug + Send { - /// Initialise the internally stored vCPU with the given PEB address and - /// random number seed, then run it until a HLT instruction. - #[allow(clippy::too_many_arguments)] - fn initialise( - &mut self, - peb_addr: RawPtr, - seed: u64, - page_size: u32, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - guest_max_log_level: Option, - #[cfg(gdb)] dbg_mem_access_fn: Arc>>, - ) -> Result<()>; - +/// Trait for single-vCPU VMs. Provides a common interface for basic VM operations. +/// Abstracts over differences between KVM, MSHV and WHP implementations. +pub(crate) trait Hypervisor: Send + Sync + Debug { /// Map memory region into this VM /// /// # Safety @@ -130,144 +103,75 @@ pub(crate) trait Hypervisor: Debug + Send { /// Unmap memory region from this VM that has previously been mapped using `map_memory`. fn unmap_memory(&mut self, region: (u32, &MemoryRegion)) -> Result<()>; - /// Dispatch a call from the host to the guest using the given pointer - /// to the dispatch function _in the guest's address space_. - /// - /// Do this by setting the instruction pointer to `dispatch_func_addr` - /// and then running the execution loop until a halt instruction. - /// - /// Returns `Ok` if the call succeeded, and an `Err` if it failed - fn dispatch_call_from_host( - &mut self, - dispatch_func_addr: RawPtr, - mem_mgr: &mut SandboxMemoryManager, - host_funcs: &Arc>, - #[cfg(gdb)] dbg_mem_access_fn: Arc>>, - ) -> Result<()>; - /// Runs the vCPU until it exits. /// Note: this function should not emit any traces or spans as it is called after guest span is setup fn run_vcpu(&mut self) -> Result; - /// Get InterruptHandle to underlying VM (returns internal trait) - fn interrupt_handle(&self) -> Arc; - - /// Clear the cancellation flag - fn clear_cancel(&self); - /// Get regs #[allow(dead_code)] fn regs(&self) -> Result; /// Set regs - #[allow(dead_code)] - fn set_regs(&mut self, regs: &CommonRegisters) -> Result<()>; + fn set_regs(&self, regs: &CommonRegisters) -> Result<()>; /// Get fpu regs #[allow(dead_code)] fn fpu(&self) -> Result; /// Set fpu regs - #[allow(dead_code)] - fn set_fpu(&mut self, fpu: &CommonFpu) -> Result<()>; + fn set_fpu(&self, fpu: &CommonFpu) -> Result<()>; /// Get special regs #[allow(dead_code)] fn sregs(&self) -> Result; /// Set special regs - #[allow(dead_code)] - fn set_sregs(&mut self, sregs: &CommonSpecialRegisters) -> Result<()>; - - /// Setup initial special registers for the hypervisor - /// This is a default implementation that works for all hypervisors - fn setup_initial_sregs(&mut self, _pml4_addr: u64) -> Result<()> { - #[cfg(feature = "init-paging")] - let sregs = CommonSpecialRegisters::standard_64bit_defaults(_pml4_addr); - - #[cfg(not(feature = "init-paging"))] - let sregs = CommonSpecialRegisters::standard_real_mode_defaults(); - - self.set_sregs(&sregs)?; - Ok(()) - } - - /// Get the logging level to pass to the guest entrypoint - fn get_max_log_level(&self) -> u32 { - // Check to see if the RUST_LOG environment variable is set - // and if so, parse it to get the log_level for hyperlight_guest - // if that is not set get the log level for the hyperlight_host - - // This is done as the guest will produce logs based on the log level returned here - // producing those logs is expensive and we don't want to do it if the host is not - // going to process them - - let val = std::env::var("RUST_LOG").unwrap_or_default(); - - let level = if val.contains("hyperlight_guest") { - val.split(',') - .find(|s| s.contains("hyperlight_guest")) - .unwrap_or("") - .split('=') - .nth(1) - .unwrap_or("") - } else if val.contains("hyperlight_host") { - val.split(',') - .find(|s| s.contains("hyperlight_host")) - .unwrap_or("") - .split('=') - .nth(1) - .unwrap_or("") - } else { - // look for a value string that does not contain "=" - val.split(',').find(|s| !s.contains("=")).unwrap_or("") - }; - - log::info!("Determined guest log level: {}", level); - // Convert the log level string to a LevelFilter - // If no value is found, default to Error - LevelFilter::from_str(level).unwrap_or(LevelFilter::Error) as u32 - } + fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()>; + /// xsave #[cfg(crashdump)] - fn crashdump_context(&self) -> Result>; + fn xsave(&self) -> Result>; - #[cfg(gdb)] - /// handles the cases when the vCPU stops due to a Debug event - fn handle_debug( - &mut self, - _dbg_mem_access_fn: Arc>>, - _stop_reason: VcpuStopReason, - ) -> Result<()> { - unimplemented!() - } + /// Get partition handle + #[cfg(target_os = "windows")] + fn partition_handle(&self) -> windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE; - /// Get a mutable reference of the trace info for the guest - #[cfg(feature = "mem_profile")] - fn trace_info_mut(&mut self) -> &mut MemTraceInfo; + /// Mark that initial memory setup is complete. After this, map_memory will fail. + /// This is only needed on Windows where dynamic memory mapping is not yet supported. + #[cfg(target_os = "windows")] + fn complete_initial_memory_setup(&mut self); } -/// The vCPU tried to access the given addr -enum MemoryAccess { - /// The accessed region has the given flags - AccessViolation(MemoryRegionFlags), - /// The accessed region is a stack guard page - StackGuardPageViolation, -} - -/// Determines if a known memory access violation occurred at the given address with the given action type. -/// Returns Some(reason) if violation reason could be determined, or None if violation occurred but in unmapped region. -fn get_memory_access_violation<'a>( - gpa: usize, - tried: MemoryRegionFlags, - mut mem_regions: impl Iterator, -) -> Option { - // find the region containing the given gpa - let region = mem_regions.find(|region| region.guest_region.contains(&gpa)); - - if let Some(region) = region { - if region.region_type == MemoryRegionType::GuardPage { - return Some(MemoryAccess::StackGuardPageViolation); - } else if !region.flags.contains(tried) { - return Some(MemoryAccess::AccessViolation(region.flags)); - } - } - None +/// Get the logging level to pass to the guest entrypoint +fn get_max_log_level() -> u32 { + // Check to see if the RUST_LOG environment variable is set + // and if so, parse it to get the log_level for hyperlight_guest + // if that is not set get the log level for the hyperlight_host + + // This is done as the guest will produce logs based on the log level returned here + // producing those logs is expensive and we don't want to do it if the host is not + // going to process them + + let val = std::env::var("RUST_LOG").unwrap_or_default(); + + let level = if val.contains("hyperlight_guest") { + val.split(',') + .find(|s| s.contains("hyperlight_guest")) + .unwrap_or("") + .split('=') + .nth(1) + .unwrap_or("") + } else if val.contains("hyperlight_host") { + val.split(',') + .find(|s| s.contains("hyperlight_host")) + .unwrap_or("") + .split('=') + .nth(1) + .unwrap_or("") + } else { + // look for a value string that does not contain "=" + val.split(',').find(|s| !s.contains("=")).unwrap_or("") + }; + + log::info!("Determined guest log level: {}", level); + // Convert the log level string to a LevelFilter + // If no value is found, default to Error + LevelFilter::from_str(level).unwrap_or(LevelFilter::Error) as u32 } /// A trait for platform-specific interrupt handle implementation details diff --git a/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs b/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs deleted file mode 100644 index 4f4e82555..000000000 --- a/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs +++ /dev/null @@ -1,586 +0,0 @@ -/* -Copyright 2025 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use core::ffi::c_void; - -use hyperlight_common::mem::PAGE_SIZE_USIZE; -use tracing::{Span, instrument}; -use windows::Win32::Foundation::{FreeLibrary, HANDLE}; -use windows::Win32::System::Hypervisor::*; -use windows::Win32::System::LibraryLoader::*; -use windows::core::s; -use windows_result::HRESULT; - -use super::regs::{ - Align16, CommonFpu, CommonRegisters, CommonSpecialRegisters, WHP_FPU_NAMES_LEN, WHP_REGS_NAMES, - WHP_REGS_NAMES_LEN, WHP_SREGS_NAMES, WHP_SREGS_NAMES_LEN, -}; -use super::surrogate_process::SurrogateProcess; -#[cfg(crashdump)] -use crate::HyperlightError; -use crate::hypervisor::regs::WHP_FPU_NAMES; -#[cfg(gdb)] -use crate::hypervisor::wrappers::WHvDebugRegisters; -use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; -use crate::{Result, new_error}; - -/// Interop calls for Windows Hypervisor Platform APIs -/// -/// Documentation can be found at: -/// - https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/hypervisor-platform -/// - https://microsoft.github.io/windows-docs-rs/doc/windows/Win32/System/Hypervisor/index.html -#[instrument(skip_all, parent = Span::current(), level= "Trace")] -pub(crate) fn is_hypervisor_present() -> bool { - let mut capability: WHV_CAPABILITY = Default::default(); - let written_size: Option<*mut u32> = None; - - match unsafe { - WHvGetCapability( - WHvCapabilityCodeHypervisorPresent, - &mut capability as *mut _ as *mut c_void, - std::mem::size_of::() as u32, - written_size, - ) - } { - Ok(_) => unsafe { capability.HypervisorPresent.as_bool() }, - Err(_) => { - log::info!("Windows Hypervisor Platform is not available on this system"); - false - } - } -} - -#[derive(Debug)] -pub(super) struct VMPartition(WHV_PARTITION_HANDLE); - -impl VMPartition { - /// This is the position of the extended vm exit in partition property - #[cfg(gdb)] - const EXTENDED_VM_EXIT_POS: u32 = 2; - - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn new(proc_count: u32) -> Result { - let hdl = unsafe { WHvCreatePartition() }?; - Self::set_processor_count(&hdl, proc_count)?; - #[cfg(gdb)] - Self::set_extended_vm_exits(&hdl)?; - unsafe { WHvSetupPartition(hdl) }?; - Ok(Self(hdl)) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - fn set_processor_count( - partition_handle: &WHV_PARTITION_HANDLE, - processor_count: u32, - ) -> Result<()> { - unsafe { - WHvSetPartitionProperty( - *partition_handle, - WHvPartitionPropertyCodeProcessorCount, - &processor_count as *const u32 as *const c_void, - std::mem::size_of_val(&processor_count) as u32, - )?; - } - - Ok(()) - } - - /// Sets up the debugging exception interception for the partition - /// This is needed for a HyperV partition to be able to intercept debug traps and breakpoints - /// Steps: - /// - set the extended VM exits property to enable extended VM exits - /// - set the exception exit bitmap to include debug trap and breakpoint trap - #[cfg(gdb)] - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub fn set_extended_vm_exits(partition_handle: &WHV_PARTITION_HANDLE) -> Result<()> { - let mut property: WHV_PARTITION_PROPERTY = Default::default(); - - // Set the extended VM exits property - property.ExtendedVmExits.AsUINT64 = 1 << Self::EXTENDED_VM_EXIT_POS; - Self::set_property( - partition_handle, - WHvPartitionPropertyCodeExtendedVmExits, - &property, - )?; - - // Set the exception exit bitmap to include debug trap and breakpoint trap - property = Default::default(); - property.ExceptionExitBitmap = (1 << WHvX64ExceptionTypeDebugTrapOrFault.0) - | (1 << WHvX64ExceptionTypeBreakpointTrap.0); - Self::set_property( - partition_handle, - WHvPartitionPropertyCodeExceptionExitBitmap, - &property, - )?; - - Ok(()) - } - - /// Helper function to set partition properties - #[cfg(gdb)] - fn set_property( - partition_handle: &WHV_PARTITION_HANDLE, - property_code: WHV_PARTITION_PROPERTY_CODE, - property: &WHV_PARTITION_PROPERTY, - ) -> Result<()> { - unsafe { - WHvSetPartitionProperty( - *partition_handle, - property_code, - property as *const _ as *const c_void, - std::mem::size_of::() as u32, - )?; - } - - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn map_gpa_range( - &mut self, - regions: &[MemoryRegion], - surrogate_process: &SurrogateProcess, - ) -> Result<()> { - let process_handle: HANDLE = surrogate_process.process_handle.into(); - // this is the address in the surrogate process where shared memory starts. - // We add page-size because we don't care about the first guard page - let surrogate_address = surrogate_process.allocated_address as usize + PAGE_SIZE_USIZE; - if regions.is_empty() { - return Err(new_error!("No memory regions to map")); - } - // this is the address in the main process where the shared memory starts - let host_address = regions[0].host_region.start; - - // offset between the surrogate process and the host process address of start of shared memory - let offset = isize::try_from(surrogate_address)? - isize::try_from(host_address)?; - - // The function pointer to WHvMapGpaRange2 is resolved dynamically to allow us to detect - // when we are running on older versions of windows that do not support this API and - // return a more informative error message, rather than failing with an error about a missing entrypoint - let whvmapgparange2_func = unsafe { - match try_load_whv_map_gpa_range2() { - Ok(func) => func, - Err(e) => return Err(new_error!("Can't find API: {}", e)), - } - }; - - regions.iter().try_for_each(|region| unsafe { - let flags = region - .flags - .iter() - .map(|flag| match flag { - MemoryRegionFlags::NONE => Ok(WHvMapGpaRangeFlagNone), - MemoryRegionFlags::READ => Ok(WHvMapGpaRangeFlagRead), - MemoryRegionFlags::WRITE => Ok(WHvMapGpaRangeFlagWrite), - MemoryRegionFlags::EXECUTE => Ok(WHvMapGpaRangeFlagExecute), - MemoryRegionFlags::STACK_GUARD => Ok(WHvMapGpaRangeFlagNone), - _ => Err(new_error!("Invalid Memory Region Flag")), - }) - .collect::>>()? - .iter() - .fold(WHvMapGpaRangeFlagNone, |acc, flag| acc | *flag); // collect using bitwise OR - - let res = whvmapgparange2_func( - self.0, - process_handle, - (isize::try_from(region.host_region.start)? + offset) as *const c_void, - region.guest_region.start as u64, - region.guest_region.len() as u64, - flags, - ); - if res.is_err() { - return Err(new_error!("Call to WHvMapGpaRange2 failed")); - } - Ok(()) - })?; - Ok(()) - } -} - -// This function dynamically loads the WHvMapGpaRange2 function from the winhvplatform.dll -// WHvMapGpaRange2 only available on Windows 11 or Windows Server 2022 and later -// we do things this way to allow a user trying to load hyperlight on an older version of windows to -// get an error message saying that hyperlight requires a newer version of windows, rather than just failing -// with an error about a missing entrypoint -// This function should always succeed since before we get here we have already checked that the hypervisor is present and -// that we are on a supported version of windows. -type WHvMapGpaRange2Func = unsafe extern "system" fn( - WHV_PARTITION_HANDLE, - HANDLE, - *const c_void, - u64, - u64, - WHV_MAP_GPA_RANGE_FLAGS, -) -> HRESULT; - -pub unsafe fn try_load_whv_map_gpa_range2() -> Result { - let library = unsafe { - LoadLibraryExA( - s!("winhvplatform.dll"), - None, - LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, - ) - }; - - if let Err(e) = library { - return Err(new_error!("{}", e)); - } - - #[allow(clippy::unwrap_used)] - // We know this will succeed because we just checked for an error above - let library = library.unwrap(); - - let address = unsafe { GetProcAddress(library, s!("WHvMapGpaRange2")) }; - - if address.is_none() { - unsafe { FreeLibrary(library)? }; - return Err(new_error!( - "Failed to find WHvMapGpaRange2 in winhvplatform.dll" - )); - } - - unsafe { Ok(std::mem::transmute_copy(&address)) } -} - -impl Drop for VMPartition { - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - fn drop(&mut self) { - if let Err(e) = unsafe { WHvDeletePartition(self.0) } { - tracing::error!( - "Failed to delete partition (WHvDeletePartition failed): {:?}", - e - ); - } - } -} - -#[derive(Debug)] -pub(crate) struct VMProcessor(VMPartition); -impl VMProcessor { - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn new(part: VMPartition) -> Result { - unsafe { WHvCreateVirtualProcessor(part.0, 0, 0) }?; - Ok(Self(part)) - } - - /// This function is used to translate a guest virtual address to a guest physical address - #[cfg(gdb)] - pub(super) fn translate_gva(&self, gva: u64) -> Result { - let partition_handle = self.get_partition_hdl(); - let mut gpa = 0; - let mut result = WHV_TRANSLATE_GVA_RESULT::default(); - - // Only validate read access because the write access is handled through the - // host memory mapping - let translateflags = WHvTranslateGvaFlagValidateRead; - - unsafe { - WHvTranslateGva( - partition_handle, - 0, - gva, - translateflags, - &mut result, - &mut gpa, - )?; - } - - Ok(gpa) - } - - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_partition_hdl(&self) -> WHV_PARTITION_HANDLE { - let part = &self.0; - part.0 - } - - /// Helper for setting arbitrary registers. - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn set_registers( - &self, - registers: &[(WHV_REGISTER_NAME, Align16)], - ) -> Result<()> { - let register_count = registers.len(); - - let mut register_names = Vec::with_capacity(register_count); - let mut register_values = Vec::with_capacity(register_count); - - for (key, value) in registers.iter() { - register_names.push(*key); - register_values.push(*value); - } - - unsafe { - WHvSetVirtualProcessorRegisters( - self.get_partition_hdl(), - 0, - register_names.as_ptr(), - register_count as u32, - register_values.as_ptr() as *const WHV_REGISTER_VALUE, - )?; - } - - Ok(()) - } - - pub(super) fn regs(&self) -> Result { - let mut whv_regs_values: [Align16; WHP_REGS_NAMES_LEN] = - unsafe { std::mem::zeroed() }; - - unsafe { - WHvGetVirtualProcessorRegisters( - self.get_partition_hdl(), - 0, - WHP_REGS_NAMES.as_ptr(), - whv_regs_values.len() as u32, - whv_regs_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, - )?; - } - - WHP_REGS_NAMES - .into_iter() - .zip(whv_regs_values) - .collect::)>>() - .as_slice() - .try_into() - .map_err(|e| { - new_error!( - "Failed to convert WHP registers to CommonRegisters: {:?}", - e - ) - }) - } - - pub(super) fn set_regs(&self, regs: &CommonRegisters) -> Result<()> { - let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_REGS_NAMES_LEN] = - regs.into(); - self.set_registers(&whp_regs)?; - Ok(()) - } - - pub(super) fn sregs(&self) -> Result { - let mut whp_sregs_values: [Align16; WHP_SREGS_NAMES_LEN] = - unsafe { std::mem::zeroed() }; - - unsafe { - WHvGetVirtualProcessorRegisters( - self.get_partition_hdl(), - 0, - WHP_SREGS_NAMES.as_ptr(), - whp_sregs_values.len() as u32, - whp_sregs_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, - )?; - } - - WHP_SREGS_NAMES - .into_iter() - .zip(whp_sregs_values) - .collect::)>>() - .as_slice() - .try_into() - .map_err(|e| { - new_error!( - "Failed to convert WHP registers to CommonSpecialRegisters: {:?}", - e - ) - }) - } - - pub(super) fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()> { - let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_SREGS_NAMES_LEN] = - sregs.into(); - self.set_registers(&whp_regs)?; - Ok(()) - } - - pub(super) fn fpu(&self) -> Result { - let mut whp_fpu_values: [Align16; WHP_FPU_NAMES_LEN] = - unsafe { std::mem::zeroed() }; - - unsafe { - WHvGetVirtualProcessorRegisters( - self.get_partition_hdl(), - 0, - WHP_FPU_NAMES.as_ptr(), - whp_fpu_values.len() as u32, - whp_fpu_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, - )?; - } - - WHP_FPU_NAMES - .into_iter() - .zip(whp_fpu_values) - .collect::)>>() - .as_slice() - .try_into() - .map_err(|e| new_error!("Failed to convert WHP registers to CommonFpu: {:?}", e)) - } - - pub(super) fn set_fpu(&self, fpu: &CommonFpu) -> Result<()> { - let whp_fpu: [(WHV_REGISTER_NAME, Align16); WHP_FPU_NAMES_LEN] = - fpu.into(); - self.set_registers(&whp_fpu)?; - Ok(()) - } - - #[cfg(crashdump)] - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_xsave(&self) -> Result> { - // Get the required buffer size by calling with NULL buffer - let mut buffer_size_needed: u32 = 0; - - unsafe { - // First call with NULL buffer to get required size - // If the buffer is not large enough, the return value is WHV_E_INSUFFICIENT_BUFFER. - // In this case, BytesWritten receives the required buffer size. - let result = WHvGetVirtualProcessorXsaveState( - self.get_partition_hdl(), - 0, - std::ptr::null_mut(), - 0, - &mut buffer_size_needed, - ); - - // If it failed for reasons other than insufficient buffer, return error - if let Err(e) = result - && e.code() != windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER - { - return Err(HyperlightError::WindowsAPIError(e)); - } - } - - // Create a buffer with the appropriate size - let mut xsave_buffer = vec![0; buffer_size_needed as usize]; - - // Get the Xsave state - let mut written_bytes = 0; - unsafe { - WHvGetVirtualProcessorXsaveState( - self.get_partition_hdl(), - 0, - xsave_buffer.as_mut_ptr() as *mut std::ffi::c_void, - buffer_size_needed, - &mut written_bytes, - ) - }?; - - // Check if the number of written bytes matches the expected size - if written_bytes != buffer_size_needed { - return Err(new_error!( - "Failed to get Xsave state: expected {} bytes, got {}", - buffer_size_needed, - written_bytes - )); - } - - Ok(xsave_buffer) - } - - #[cfg(gdb)] - pub(super) fn set_debug_regs(&self, regs: &WHvDebugRegisters) -> Result<()> { - let registers = vec![ - ( - WHvX64RegisterDr0, - Align16(WHV_REGISTER_VALUE { Reg64: regs.dr0 }), - ), - ( - WHvX64RegisterDr1, - Align16(WHV_REGISTER_VALUE { Reg64: regs.dr1 }), - ), - ( - WHvX64RegisterDr2, - Align16(WHV_REGISTER_VALUE { Reg64: regs.dr2 }), - ), - ( - WHvX64RegisterDr3, - Align16(WHV_REGISTER_VALUE { Reg64: regs.dr3 }), - ), - ( - WHvX64RegisterDr6, - Align16(WHV_REGISTER_VALUE { Reg64: regs.dr6 }), - ), - ( - WHvX64RegisterDr7, - Align16(WHV_REGISTER_VALUE { Reg64: regs.dr7 }), - ), - ]; - - self.set_registers(®isters) - } - - #[cfg(gdb)] - pub(super) fn get_debug_regs(&self) -> Result { - const LEN: usize = 6; - - let names: [WHV_REGISTER_NAME; LEN] = [ - WHvX64RegisterDr0, - WHvX64RegisterDr1, - WHvX64RegisterDr2, - WHvX64RegisterDr3, - WHvX64RegisterDr6, - WHvX64RegisterDr7, - ]; - - let mut out: [Align16; LEN] = unsafe { std::mem::zeroed() }; - unsafe { - WHvGetVirtualProcessorRegisters( - self.get_partition_hdl(), - 0, - names.as_ptr(), - LEN as u32, - out.as_mut_ptr() as *mut WHV_REGISTER_VALUE, - )?; - Ok(WHvDebugRegisters { - dr0: out[0].0.Reg64, - dr1: out[1].0.Reg64, - dr2: out[2].0.Reg64, - dr3: out[3].0.Reg64, - dr6: out[4].0.Reg64, - dr7: out[5].0.Reg64, - }) - } - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn run(&mut self) -> Result { - let partition_handle = self.get_partition_hdl(); - let mut exit_context: WHV_RUN_VP_EXIT_CONTEXT = Default::default(); - - unsafe { - WHvRunVirtualProcessor( - partition_handle, - 0, - &mut exit_context as *mut _ as *mut c_void, - std::mem::size_of::() as u32, - )?; - } - - Ok(exit_context) - } -} - -impl Drop for VMProcessor { - #[instrument(parent = Span::current(), level= "Trace")] - fn drop(&mut self) { - let part_hdl = self.get_partition_hdl(); - if let Err(e) = unsafe { WHvDeleteVirtualProcessor(part_hdl, 0) } { - tracing::error!( - "Failed to delete virtual processor (WHvDeleteVirtualProcessor failed): {:?}", - e - ); - } - } -} diff --git a/src/hyperlight_host/src/hypervisor/wrappers.rs b/src/hyperlight_host/src/hypervisor/wrappers.rs index 135ae2b82..0d116f812 100644 --- a/src/hyperlight_host/src/hypervisor/wrappers.rs +++ b/src/hyperlight_host/src/hypervisor/wrappers.rs @@ -56,18 +56,6 @@ impl From<&PSTRWrapper> for PSTR { } } -/// only used on widos for handling debug registers with the VMProcessor -#[cfg(gdb)] -#[derive(Debug, Default, Copy, Clone, PartialEq)] -pub(super) struct WHvDebugRegisters { - pub dr0: u64, - pub dr1: u64, - pub dr2: u64, - pub dr3: u64, - pub dr6: u64, - pub dr7: u64, -} - /// Wrapper for HANDLE, required since HANDLE is no longer Send. #[derive(Debug, Copy, Clone)] pub struct HandleWrapper(HANDLE); diff --git a/src/hyperlight_host/src/sandbox/hypervisor.rs b/src/hyperlight_host/src/sandbox/hypervisor.rs index 412f55000..c72b7efbf 100644 --- a/src/hyperlight_host/src/sandbox/hypervisor.rs +++ b/src/hyperlight_host/src/sandbox/hypervisor.rs @@ -51,9 +51,9 @@ pub fn get_available_hypervisor() -> &'static Option { None } } else if #[cfg(target_os = "windows")] { - use crate::sandbox::windows_hypervisor_platform; + use crate::hypervisor::hyperv_windows; - if windows_hypervisor_platform::is_hypervisor_present() { + if hyperv_windows::is_hypervisor_present() { Some(HypervisorType::Whp) } else { None diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 232c7ae9f..5aa608b37 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -37,8 +37,8 @@ use super::host_funcs::FunctionRegistry; use super::snapshot::Snapshot; use crate::HyperlightError::{self, SnapshotSandboxMismatch}; use crate::func::{ParameterTuple, SupportedReturnType}; +use crate::hypervisor::InterruptHandle; use crate::hypervisor::hyperlight_vm::HyperlightVm; -use crate::hypervisor::{Hypervisor, InterruptHandle}; #[cfg(unix)] use crate::mem::memory_region::MemoryRegionType; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; @@ -714,7 +714,7 @@ impl MultiUseSandbox { #[cfg(crashdump)] #[instrument(err(Debug), skip_all, parent = Span::current())] pub fn generate_crashdump(&self) -> Result<()> { - crate::hypervisor::crashdump::generate_crashdump(self.vm.vm.as_ref() as &dyn Hypervisor) + crate::hypervisor::crashdump::generate_crashdump(&self.vm) } /// Returns whether the sandbox is currently poisoned. @@ -1250,7 +1250,7 @@ mod tests { // Verify the region is the same let mut restored_regions = sbox.vm.get_mapped_regions(); - assert_eq!(*restored_regions.next().unwrap(), region); + assert_eq!(restored_regions.next().unwrap(), ®ion); assert!(restored_regions.next().is_none()); drop(restored_regions); diff --git a/src/hyperlight_host/src/sandbox/mod.rs b/src/hyperlight_host/src/sandbox/mod.rs index 93bae12a8..bc03ed91f 100644 --- a/src/hyperlight_host/src/sandbox/mod.rs +++ b/src/hyperlight_host/src/sandbox/mod.rs @@ -53,9 +53,6 @@ pub use uninitialized::GuestBinary; /// Re-export for `UninitializedSandbox` type pub use uninitialized::UninitializedSandbox; -#[cfg(target_os = "windows")] -use crate::hypervisor::windows_hypervisor_platform; - // In case its not obvious why there are separate is_supported_platform and is_hypervisor_present functions its because // Hyperlight is designed to be able to run on a host that doesn't have a hypervisor. // In that case, the sandbox will be in process, we plan on making this a dev only feature and fixing up Linux support diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index 86611af05..98941d893 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -21,11 +21,8 @@ use rand::Rng; use tracing::{Span, instrument}; use super::SandboxConfiguration; -use super::hypervisor::{HypervisorType, get_available_hypervisor}; #[cfg(any(crashdump, gdb))] use super::uninitialized::SandboxRuntimeConfig; -use crate::HyperlightError::NoHypervisorFound; -use crate::hypervisor::Hypervisor; use crate::hypervisor::hyperlight_vm::HyperlightVm; use crate::mem::exe::LoadInfo; use crate::mem::layout::SandboxMemoryLayout; @@ -168,71 +165,33 @@ pub(crate) fn set_up_hypervisor_partition( #[cfg(feature = "mem_profile")] let trace_info = MemTraceInfo::new(_load_info)?; - let vm: Box = match *get_available_hypervisor() { - #[cfg(mshv3)] - Some(HypervisorType::Mshv) => { - let hv = crate::hypervisor::hyperv_linux::HypervLinuxDriver::new( - entrypoint_ptr, - rsp_ptr, - pml4_ptr, - config, - #[cfg(gdb)] - gdb_conn, - #[cfg(crashdump)] - rt_cfg.clone(), - #[cfg(feature = "mem_profile")] - trace_info, - )?; - Box::new(hv) - } - - #[cfg(kvm)] - Some(HypervisorType::Kvm) => { - let hv = crate::hypervisor::kvm::KVMDriver::new( - pml4_ptr.absolute()?, - entrypoint_ptr.absolute()?, - rsp_ptr.absolute()?, - config, - #[cfg(gdb)] - gdb_conn, - #[cfg(crashdump)] - rt_cfg.clone(), - #[cfg(feature = "mem_profile")] - trace_info, - )?; - Box::new(hv) - } - + HyperlightVm::new( + regions, + pml4_ptr.absolute()?, + entrypoint_ptr.absolute()?, + rsp_ptr.absolute()?, + config, #[cfg(target_os = "windows")] - Some(HypervisorType::Whp) => { + { use crate::hypervisor::wrappers::HandleWrapper; - - let mmap_file_handle = mgr - .shared_mem - .with_exclusivity(|e| e.get_mmap_file_handle())?; - let hv = crate::hypervisor::hyperv_windows::HypervWindowsDriver::new( - regions, - mgr.shared_mem.raw_mem_size(), // we use raw_* here because windows driver requires 64K aligned addresses, - pml4_ptr.absolute()?, - entrypoint_ptr.absolute()?, - rsp_ptr.absolute()?, - HandleWrapper::from(mmap_file_handle), - #[cfg(gdb)] - gdb_conn, - #[cfg(crashdump)] - rt_cfg.clone(), - #[cfg(feature = "mem_profile")] - trace_info, - )?; - Box::new(hv) - } - - _ => { - log_then_return!(NoHypervisorFound()); - } - }; - - HyperlightVm::new(vm, regions, config) + use crate::mem::shared_mem::SharedMemory; + HandleWrapper::from( + mgr.shared_mem + .with_exclusivity(|s| s.get_mmap_file_handle())?, + ) + }, + #[cfg(target_os = "windows")] + { + use crate::mem::shared_mem::SharedMemory; + mgr.shared_mem.raw_mem_size() + }, + #[cfg(gdb)] + gdb_conn, + #[cfg(crashdump)] + rt_cfg.clone(), + #[cfg(feature = "mem_profile")] + trace_info, + ) } #[cfg(test)] From 09fdc1c550396181dd6e548bd026db90c3849ef0 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Wed, 26 Nov 2025 12:58:31 -0800 Subject: [PATCH 4/4] Reorder to minimize diff Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/hypervisor/hyperv_linux.rs | 117 ++--- .../src/hypervisor/hyperv_windows.rs | 452 +++++++++--------- src/hyperlight_host/src/hypervisor/kvm.rs | 81 ++-- 3 files changed, 343 insertions(+), 307 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index 968506816..8c6feb351 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -21,7 +21,8 @@ use std::sync::LazyLock; #[cfg(gdb)] use mshv_bindings::{DebugRegisters, hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT}; use mshv_bindings::{ - hv_message_type, hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, + FloatingPointUnit, SpecialRegisters, StandardRegisters, hv_message_type, + hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, hv_partition_synthetic_processor_features, hv_register_assoc, @@ -32,7 +33,6 @@ use tracing::{Span, instrument}; #[cfg(gdb)] use crate::hypervisor::gdb::DebuggableVm; -use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; use crate::hypervisor::{HyperlightExit, Hypervisor}; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::{Result, new_error}; @@ -90,41 +90,6 @@ impl MshvVm { } impl Hypervisor for MshvVm { - fn regs(&self) -> Result { - Ok((&self.vcpu_fd.get_regs()?).into()) - } - - fn set_regs(&self, regs: &CommonRegisters) -> Result<()> { - Ok(self.vcpu_fd.set_regs(®s.into())?) - } - - fn sregs(&self) -> Result { - Ok((&self.vcpu_fd.get_sregs()?).into()) - } - - fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()> { - self.vcpu_fd.set_sregs(&sregs.into())?; - Ok(()) - } - - fn fpu(&self) -> Result { - Ok((&self.vcpu_fd.get_fpu()?).into()) - } - - fn set_fpu(&self, fpu: &CommonFpu) -> Result<()> { - self.vcpu_fd.set_fpu(&fpu.into())?; - Ok(()) - } - - #[cfg(crashdump)] - fn xsave(&self) -> Result> { - let xsave = self.vcpu_fd.get_xsave()?; - Ok(xsave.buffer.to_vec()) - } - - /// # Safety - /// The caller must ensure that the memory region is valid and points to valid memory, - /// and lives long enough for the VM to use it. unsafe fn map_memory(&mut self, (_slot, region): (u32, &MemoryRegion)) -> Result<()> { let mshv_region: mshv_user_mem_region = region.into(); self.vm_fd.map_user_memory(mshv_region)?; @@ -209,6 +174,45 @@ impl Hypervisor for MshvVm { }; Ok(result) } + + fn regs(&self) -> Result { + let mshv_regs = self.vcpu_fd.get_regs()?; + Ok((&mshv_regs).into()) + } + + fn set_regs(&self, regs: &super::regs::CommonRegisters) -> Result<()> { + let mshv_regs: StandardRegisters = regs.into(); + self.vcpu_fd.set_regs(&mshv_regs)?; + Ok(()) + } + + fn fpu(&self) -> Result { + let mshv_fpu = self.vcpu_fd.get_fpu()?; + Ok((&mshv_fpu).into()) + } + + fn set_fpu(&self, fpu: &super::regs::CommonFpu) -> Result<()> { + let mshv_fpu: FloatingPointUnit = fpu.into(); + self.vcpu_fd.set_fpu(&mshv_fpu)?; + Ok(()) + } + + fn sregs(&self) -> Result { + let mshv_sregs = self.vcpu_fd.get_sregs()?; + Ok((&mshv_sregs).into()) + } + + fn set_sregs(&self, sregs: &super::regs::CommonSpecialRegisters) -> Result<()> { + let mshv_sregs: SpecialRegisters = sregs.into(); + self.vcpu_fd.set_sregs(&mshv_sregs)?; + Ok(()) + } + + #[cfg(crashdump)] + fn xsave(&self) -> Result> { + let xsave = self.vcpu_fd.get_xsave()?; + Ok(xsave.buffer.to_vec()) + } } #[cfg(gdb)] @@ -229,37 +233,36 @@ impl DebuggableVm for MshvVm { fn set_debug(&mut self, enabled: bool) -> Result<()> { use mshv_bindings::{ - HV_INTERCEPT_ACCESS_MASK_EXECUTE, hv_intercept_parameters, - hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, mshv_install_intercept, + HV_INTERCEPT_ACCESS_MASK_EXECUTE, HV_INTERCEPT_ACCESS_MASK_NONE, + hv_intercept_parameters, hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, + mshv_install_intercept, }; use crate::hypervisor::gdb::arch::{BP_EX_ID, DB_EX_ID}; - if enabled { - self.vm_fd - .install_intercept(mshv_install_intercept { - access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE, - intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, - // Exception handler #DB (1) - intercept_parameter: hv_intercept_parameters { - exception_vector: DB_EX_ID as u16, - }, - }) - .map_err(|e| new_error!("Cannot install debug exception intercept: {}", e))?; + let access_type_mask = if enabled { + HV_INTERCEPT_ACCESS_MASK_EXECUTE + } else { + HV_INTERCEPT_ACCESS_MASK_NONE + }; - // Install intercept for #BP (3) exception + for vector in [DB_EX_ID, BP_EX_ID] { self.vm_fd .install_intercept(mshv_install_intercept { - access_type_mask: HV_INTERCEPT_ACCESS_MASK_EXECUTE, + access_type_mask, intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_EXCEPTION, - // Exception handler #BP (3) intercept_parameter: hv_intercept_parameters { - exception_vector: BP_EX_ID as u16, + exception_vector: vector as u16, }, }) - .map_err(|e| new_error!("Cannot install breakpoint exception intercept: {}", e))?; - } else { - // There doesn't seem to be any way to remove installed intercepts. But that's okay. + .map_err(|e| { + new_error!( + "Cannot {} exception intercept for vector {}: {}", + if enabled { "install" } else { "remove" }, + vector, + e + ) + })?; } Ok(()) } diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 9507de40b..e321f1970 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -120,26 +120,21 @@ impl WhpVm { }) } - /// Helper for setting arbitrary registers. - fn set_registers(&self, registers: &[(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)]) -> Result<()> { - let register_count = registers.len(); - - // Prepare register names (no special alignment needed) - let mut register_names = Vec::with_capacity(register_count); - let mut register_values = Vec::with_capacity(register_count); - - for (key, value) in registers.iter() { - register_names.push(*key); - register_values.push(Align16(*value)); - } + /// Helper for setting arbitrary registers. Makes sure the same number + /// of names and values are passed (at the expense of some performance). + fn set_registers( + &self, + registers: &[(WHV_REGISTER_NAME, Align16)], + ) -> Result<()> { + let (names, values): (Vec<_>, Vec<_>) = registers.iter().copied().unzip(); unsafe { WHvSetVirtualProcessorRegisters( self.partition, 0, - register_names.as_ptr(), - register_count as u32, - register_values.as_ptr() as *const WHV_REGISTER_VALUE, + names.as_ptr(), + names.len() as u32, + values.as_ptr() as *const WHV_REGISTER_VALUE, // Casting Align16 away )?; } @@ -148,175 +143,6 @@ impl WhpVm { } impl Hypervisor for WhpVm { - /// Get the partition handle for this VM - fn partition_handle(&self) -> WHV_PARTITION_HANDLE { - self.partition - } - fn regs(&self) -> Result { - let mut whv_regs_values: [Align16; WHP_REGS_NAMES_LEN] = - unsafe { std::mem::zeroed() }; - - unsafe { - WHvGetVirtualProcessorRegisters( - self.partition, - 0, - WHP_REGS_NAMES.as_ptr(), - whv_regs_values.len() as u32, - whv_regs_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, - )?; - } - - WHP_REGS_NAMES - .into_iter() - .zip(whv_regs_values) - .collect::)>>() - .as_slice() - .try_into() - .map_err(|e| { - new_error!( - "Failed to convert WHP registers to CommonRegisters: {:?}", - e - ) - }) - } - - fn set_regs(&self, regs: &CommonRegisters) -> Result<()> { - let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_REGS_NAMES_LEN] = - regs.into(); - let whp_regs_unaligned: Vec<(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)> = whp_regs - .iter() - .map(|(name, value)| (*name, value.0)) - .collect(); - self.set_registers(&whp_regs_unaligned)?; - Ok(()) - } - - fn sregs(&self) -> Result { - let mut whp_sregs_values: [Align16; WHP_SREGS_NAMES_LEN] = - unsafe { std::mem::zeroed() }; - - unsafe { - WHvGetVirtualProcessorRegisters( - self.partition, - 0, - WHP_SREGS_NAMES.as_ptr(), - whp_sregs_values.len() as u32, - whp_sregs_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, - )?; - } - - WHP_SREGS_NAMES - .into_iter() - .zip(whp_sregs_values) - .collect::)>>() - .as_slice() - .try_into() - .map_err(|e| { - new_error!( - "Failed to convert WHP registers to CommonSpecialRegisters: {:?}", - e - ) - }) - } - - fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()> { - let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_SREGS_NAMES_LEN] = - sregs.into(); - let whp_regs_unaligned: Vec<(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)> = whp_regs - .iter() - .map(|(name, value)| (*name, value.0)) - .collect(); - self.set_registers(&whp_regs_unaligned)?; - Ok(()) - } - - fn fpu(&self) -> Result { - let mut whp_fpu_values: [Align16; WHP_FPU_NAMES_LEN] = - unsafe { std::mem::zeroed() }; - - unsafe { - WHvGetVirtualProcessorRegisters( - self.partition, - 0, - WHP_FPU_NAMES.as_ptr(), - whp_fpu_values.len() as u32, - whp_fpu_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, - )?; - } - - WHP_FPU_NAMES - .into_iter() - .zip(whp_fpu_values) - .collect::)>>() - .as_slice() - .try_into() - .map_err(|e| new_error!("Failed to convert WHP registers to CommonFpu: {:?}", e)) - } - - fn set_fpu(&self, fpu: &CommonFpu) -> Result<()> { - let whp_fpu: [(WHV_REGISTER_NAME, Align16); WHP_FPU_NAMES_LEN] = - fpu.into(); - let whp_fpu_unaligned: Vec<(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)> = whp_fpu - .iter() - .map(|(name, value)| (*name, value.0)) - .collect(); - self.set_registers(&whp_fpu_unaligned)?; - Ok(()) - } - - #[cfg(crashdump)] - fn xsave(&self) -> Result> { - use crate::HyperlightError; - - // Get the required buffer size by calling with NULL buffer. - // If the buffer is not large enough (0 won't be), WHvGetVirtualProcessorXsaveState returns - // WHV_E_INSUFFICIENT_BUFFER and sets buffer_size_needed to the required size. - let mut buffer_size_needed: u32 = 0; - - let result = unsafe { - WHvGetVirtualProcessorXsaveState( - self.partition, - 0, - std::ptr::null_mut(), - 0, - &mut buffer_size_needed, - ) - }; - - // Expect insufficient buffer error; any other error is unexpected - if let Err(e) = result - && e.code() != windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER - { - return Err(HyperlightError::WindowsAPIError(e)); - } - - // Allocate buffer with the required size - let mut xsave_buffer = vec![0u8; buffer_size_needed as usize]; - let mut written_bytes = 0; - - // Get the actual Xsave state - unsafe { - WHvGetVirtualProcessorXsaveState( - self.partition, - 0, - xsave_buffer.as_mut_ptr() as *mut std::ffi::c_void, - buffer_size_needed, - &mut written_bytes, - ) - }?; - - // Verify the number of written bytes matches the expected size - if written_bytes != buffer_size_needed { - return Err(new_error!( - "Failed to get Xsave state: expected {} bytes, got {}", - buffer_size_needed, - written_bytes - )); - } - - Ok(xsave_buffer) - } - unsafe fn map_memory(&mut self, (_slot, region): (u32, &MemoryRegion)) -> Result<()> { // Only allow memory mapping during initial setup (the first batch of regions). // After the initial setup is complete, subsequent calls should fail, @@ -408,7 +234,10 @@ impl Hypervisor for WhpVm { WHvRunVpExitReasonX64IoPortAccess => unsafe { let instruction_length = exit_context.VpContext._bitfield & 0xF; let rip = exit_context.VpContext.Rip + instruction_length as u64; - self.set_registers(&[(WHvX64RegisterRip, WHV_REGISTER_VALUE { Reg64: rip })])?; + self.set_registers(&[( + WHvX64RegisterRip, + Align16(WHV_REGISTER_VALUE { Reg64: rip }), + )])?; HyperlightExit::IoOut( exit_context.Anonymous.IoPortAccess.PortNumber, exit_context @@ -470,10 +299,168 @@ impl Hypervisor for WhpVm { Ok(result) } + fn regs(&self) -> Result { + let mut whv_regs_values: [Align16; WHP_REGS_NAMES_LEN] = + unsafe { std::mem::zeroed() }; + + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + WHP_REGS_NAMES.as_ptr(), + whv_regs_values.len() as u32, + whv_regs_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + )?; + } + + WHP_REGS_NAMES + .into_iter() + .zip(whv_regs_values) + .collect::)>>() + .as_slice() + .try_into() + .map_err(|e| { + new_error!( + "Failed to convert WHP registers to CommonRegisters: {:?}", + e + ) + }) + } + + fn set_regs(&self, regs: &CommonRegisters) -> Result<()> { + let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_REGS_NAMES_LEN] = + regs.into(); + self.set_registers(&whp_regs)?; + Ok(()) + } + + fn fpu(&self) -> Result { + let mut whp_fpu_values: [Align16; WHP_FPU_NAMES_LEN] = + unsafe { std::mem::zeroed() }; + + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + WHP_FPU_NAMES.as_ptr(), + whp_fpu_values.len() as u32, + whp_fpu_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + )?; + } + + WHP_FPU_NAMES + .into_iter() + .zip(whp_fpu_values) + .collect::)>>() + .as_slice() + .try_into() + .map_err(|e| new_error!("Failed to convert WHP registers to CommonFpu: {:?}", e)) + } + + fn set_fpu(&self, fpu: &CommonFpu) -> Result<()> { + let whp_fpu: [(WHV_REGISTER_NAME, Align16); WHP_FPU_NAMES_LEN] = + fpu.into(); + self.set_registers(&whp_fpu)?; + Ok(()) + } + + fn sregs(&self) -> Result { + let mut whp_sregs_values: [Align16; WHP_SREGS_NAMES_LEN] = + unsafe { std::mem::zeroed() }; + + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + WHP_SREGS_NAMES.as_ptr(), + whp_sregs_values.len() as u32, + whp_sregs_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + )?; + } + + WHP_SREGS_NAMES + .into_iter() + .zip(whp_sregs_values) + .collect::)>>() + .as_slice() + .try_into() + .map_err(|e| { + new_error!( + "Failed to convert WHP registers to CommonSpecialRegisters: {:?}", + e + ) + }) + } + + fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()> { + let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_SREGS_NAMES_LEN] = + sregs.into(); + self.set_registers(&whp_regs)?; + Ok(()) + } + + #[cfg(crashdump)] + fn xsave(&self) -> Result> { + use crate::HyperlightError; + + // Get the required buffer size by calling with NULL buffer. + // If the buffer is not large enough (0 won't be), WHvGetVirtualProcessorXsaveState returns + // WHV_E_INSUFFICIENT_BUFFER and sets buffer_size_needed to the required size. + let mut buffer_size_needed: u32 = 0; + + let result = unsafe { + WHvGetVirtualProcessorXsaveState( + self.partition, + 0, + std::ptr::null_mut(), + 0, + &mut buffer_size_needed, + ) + }; + + // Expect insufficient buffer error; any other error is unexpected + if let Err(e) = result + && e.code() != windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER + { + return Err(HyperlightError::WindowsAPIError(e)); + } + + // Allocate buffer with the required size + let mut xsave_buffer = vec![0u8; buffer_size_needed as usize]; + let mut written_bytes = 0; + + // Get the actual Xsave state + unsafe { + WHvGetVirtualProcessorXsaveState( + self.partition, + 0, + xsave_buffer.as_mut_ptr() as *mut std::ffi::c_void, + buffer_size_needed, + &mut written_bytes, + ) + }?; + + // Verify the number of written bytes matches the expected size + if written_bytes != buffer_size_needed { + return Err(new_error!( + "Failed to get Xsave state: expected {} bytes, got {}", + buffer_size_needed, + written_bytes + )); + } + + Ok(xsave_buffer) + } + /// Mark that initial memory setup is complete. After this, map_memory will fail. fn complete_initial_memory_setup(&mut self) { self.initial_memory_setup_done = true; } + + /// Get the partition handle for this VM + fn partition_handle(&self) -> WHV_PARTITION_HANDLE { + self.partition + } } #[cfg(gdb)] @@ -501,30 +488,37 @@ impl DebuggableVm for WhpVm { } fn set_debug(&mut self, enable: bool) -> Result<()> { - if enable { - // Set the extended VM exits property to enable extended VM exits - let mut property: WHV_PARTITION_PROPERTY = Default::default(); - property.ExtendedVmExits.AsUINT64 = 1 << 2; // EXTENDED_VM_EXIT_POS - - unsafe { - WHvSetPartitionProperty( - self.partition, - WHvPartitionPropertyCodeExtendedVmExits, - &property as *const _ as *const c_void, - std::mem::size_of::() as u32, - )?; - } + let extended_vm_exits = if enable { 1 << 2 } else { 0 }; + let exception_exit_bitmap = if enable { + (1 << WHvX64ExceptionTypeDebugTrapOrFault.0) + | (1 << WHvX64ExceptionTypeBreakpointTrap.0) + } else { + 0 + }; - // Set the exception exit bitmap to include debug trap and breakpoint trap - let mut exception_property: WHV_PARTITION_PROPERTY = Default::default(); - exception_property.ExceptionExitBitmap = (1 << WHvX64ExceptionTypeDebugTrapOrFault.0) - | (1 << WHvX64ExceptionTypeBreakpointTrap.0); + let properties = [ + ( + WHvPartitionPropertyCodeExtendedVmExits, + WHV_PARTITION_PROPERTY { + ExtendedVmExits: WHV_EXTENDED_VM_EXITS { + AsUINT64: extended_vm_exits, + }, + }, + ), + ( + WHvPartitionPropertyCodeExceptionExitBitmap, + WHV_PARTITION_PROPERTY { + ExceptionExitBitmap: exception_exit_bitmap, + }, + ), + ]; + for (code, property) in properties { unsafe { WHvSetPartitionProperty( self.partition, - WHvPartitionPropertyCodeExceptionExitBitmap, - &exception_property as *const _ as *const c_void, + code, + &property as *const _ as *const c_void, std::mem::size_of::() as u32, )?; } @@ -592,11 +586,26 @@ impl DebuggableVm for WhpVm { // Set the debug registers let registers = vec![ - (WHvX64RegisterDr0, WHV_REGISTER_VALUE { Reg64: dr0 }), - (WHvX64RegisterDr1, WHV_REGISTER_VALUE { Reg64: dr1 }), - (WHvX64RegisterDr2, WHV_REGISTER_VALUE { Reg64: dr2 }), - (WHvX64RegisterDr3, WHV_REGISTER_VALUE { Reg64: dr3 }), - (WHvX64RegisterDr7, WHV_REGISTER_VALUE { Reg64: dr7 }), + ( + WHvX64RegisterDr0, + Align16(WHV_REGISTER_VALUE { Reg64: dr0 }), + ), + ( + WHvX64RegisterDr1, + Align16(WHV_REGISTER_VALUE { Reg64: dr1 }), + ), + ( + WHvX64RegisterDr2, + Align16(WHV_REGISTER_VALUE { Reg64: dr2 }), + ), + ( + WHvX64RegisterDr3, + Align16(WHV_REGISTER_VALUE { Reg64: dr3 }), + ), + ( + WHvX64RegisterDr7, + Align16(WHV_REGISTER_VALUE { Reg64: dr7 }), + ), ]; self.set_registers(®isters)?; Ok(()) @@ -641,11 +650,26 @@ impl DebuggableVm for WhpVm { // Set the debug registers let registers = vec![ - (WHvX64RegisterDr0, WHV_REGISTER_VALUE { Reg64: dr0 }), - (WHvX64RegisterDr1, WHV_REGISTER_VALUE { Reg64: dr1 }), - (WHvX64RegisterDr2, WHV_REGISTER_VALUE { Reg64: dr2 }), - (WHvX64RegisterDr3, WHV_REGISTER_VALUE { Reg64: dr3 }), - (WHvX64RegisterDr7, WHV_REGISTER_VALUE { Reg64: dr7 }), + ( + WHvX64RegisterDr0, + Align16(WHV_REGISTER_VALUE { Reg64: dr0 }), + ), + ( + WHvX64RegisterDr1, + Align16(WHV_REGISTER_VALUE { Reg64: dr1 }), + ), + ( + WHvX64RegisterDr2, + Align16(WHV_REGISTER_VALUE { Reg64: dr2 }), + ), + ( + WHvX64RegisterDr3, + Align16(WHV_REGISTER_VALUE { Reg64: dr3 }), + ), + ( + WHvX64RegisterDr7, + Align16(WHV_REGISTER_VALUE { Reg64: dr7 }), + ), ]; self.set_registers(®isters)?; Ok(()) diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index d80c100a4..037b60fc1 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -18,19 +18,19 @@ use std::sync::LazyLock; #[cfg(gdb)] use kvm_bindings::kvm_guest_debug; -use kvm_bindings::kvm_userspace_memory_region; +use kvm_bindings::{kvm_fpu, kvm_regs, kvm_sregs, kvm_userspace_memory_region}; use kvm_ioctls::Cap::UserMemory; use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd}; use tracing::{Span, instrument}; #[cfg(gdb)] use crate::hypervisor::gdb::DebuggableVm; -use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; use crate::hypervisor::{HyperlightExit, Hypervisor}; use crate::mem::memory_region::MemoryRegion; use crate::{Result, new_error}; /// Return `true` if the KVM API is available, version 12, and has UserMemory capability, or `false` otherwise +#[instrument(skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn is_hypervisor_present() -> bool { if let Ok(kvm) = Kvm::new() { let api_version = kvm.get_api_version(); @@ -85,40 +85,6 @@ impl KvmVm { } impl Hypervisor for KvmVm { - fn regs(&self) -> Result { - Ok((&self.vcpu_fd.get_regs()?).into()) - } - - fn set_regs(&self, regs: &CommonRegisters) -> Result<()> { - Ok(self.vcpu_fd.set_regs(®s.into())?) - } - - fn sregs(&self) -> Result { - Ok((&self.vcpu_fd.get_sregs()?).into()) - } - - fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()> { - Ok(self.vcpu_fd.set_sregs(&sregs.into())?) - } - - fn fpu(&self) -> Result { - Ok((&self.vcpu_fd.get_fpu()?).into()) - } - - fn set_fpu(&self, fpu: &CommonFpu) -> Result<()> { - Ok(self.vcpu_fd.set_fpu(&fpu.into())?) - } - - #[cfg(crashdump)] - fn xsave(&self) -> Result> { - let xsave = self.vcpu_fd.get_xsave()?; - Ok(xsave - .region - .into_iter() - .flat_map(u32::to_le_bytes) - .collect()) - } - unsafe fn map_memory(&mut self, (slot, region): (u32, &MemoryRegion)) -> Result<()> { let mut kvm_region: kvm_userspace_memory_region = region.into(); kvm_region.slot = slot; @@ -163,6 +129,49 @@ impl Hypervisor for KvmVm { ))), } } + + fn regs(&self) -> Result { + let kvm_regs = self.vcpu_fd.get_regs()?; + Ok((&kvm_regs).into()) + } + + fn set_regs(&self, regs: &super::regs::CommonRegisters) -> Result<()> { + let kvm_regs: kvm_regs = regs.into(); + self.vcpu_fd.set_regs(&kvm_regs)?; + Ok(()) + } + + fn fpu(&self) -> Result { + let kvm_fpu = self.vcpu_fd.get_fpu()?; + Ok((&kvm_fpu).into()) + } + + fn set_fpu(&self, fpu: &super::regs::CommonFpu) -> Result<()> { + let kvm_fpu: kvm_fpu = fpu.into(); + self.vcpu_fd.set_fpu(&kvm_fpu)?; + Ok(()) + } + + fn sregs(&self) -> Result { + let kvm_sregs = self.vcpu_fd.get_sregs()?; + Ok((&kvm_sregs).into()) + } + + fn set_sregs(&self, sregs: &super::regs::CommonSpecialRegisters) -> Result<()> { + let kvm_sregs: kvm_sregs = sregs.into(); + self.vcpu_fd.set_sregs(&kvm_sregs)?; + Ok(()) + } + + #[cfg(crashdump)] + fn xsave(&self) -> Result> { + let xsave = self.vcpu_fd.get_xsave()?; + Ok(xsave + .region + .into_iter() + .flat_map(u32::to_le_bytes) + .collect()) + } } #[cfg(gdb)]