diff --git a/pinchy-common/src/kernel_types.rs b/pinchy-common/src/kernel_types.rs index d5567d4..66d3051 100644 --- a/pinchy-common/src/kernel_types.rs +++ b/pinchy-common/src/kernel_types.rs @@ -27,6 +27,9 @@ pub const IOCB_FLAG_IOPRIO: u32 = 1 << 1; // aio_reqprio is valid pub const AIO_IOCB_ARRAY_CAP: usize = 4; pub const AIO_EVENT_ARRAY_CAP: usize = 4; +// Constants for kexec_load +pub const KEXEC_SEGMENT_ARRAY_CAP: usize = 16; + #[repr(C)] #[derive(Debug, Default, Copy, Clone)] pub struct Pollfd { @@ -886,3 +889,14 @@ pub struct SeccompNotifSizes { pub seccomp_notif_resp: u16, // Size of response structure pub seccomp_data: u16, // Size of 'struct seccomp_data' } + +/// Kernel memory segment descriptor for kexec_load +/// See: https://man7.org/linux/man-pages/man2/kexec_load.2.html +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct KexecSegment { + pub buf: u64, // Buffer in user space (void* as u64) + pub bufsz: u64, // Buffer length in user space (size_t as u64) + pub mem: u64, // Physical address of kernel (void* as u64) + pub memsz: u64, // Physical address length (size_t as u64) +} diff --git a/pinchy-common/src/lib.rs b/pinchy-common/src/lib.rs index e73538f..45bb61a 100644 --- a/pinchy-common/src/lib.rs +++ b/pinchy-common/src/lib.rs @@ -412,6 +412,8 @@ pub union SyscallEventData { pub pkey_mprotect: PkeyMprotectData, pub mseal: MsealData, pub remap_file_pages: RemapFilePagesData, + pub restart_syscall: RestartSyscallData, + pub kexec_load: KexecLoadData, } #[repr(C)] @@ -3405,3 +3407,20 @@ pub struct RemapFilePagesData { pub pgoff: u64, pub flags: i32, } + +#[repr(C)] +#[derive(Clone, Copy, Default)] +pub struct RestartSyscallData { + // No arguments +} + +#[repr(C)] +#[derive(Clone, Copy, Default)] +pub struct KexecLoadData { + pub entry: u64, + pub nr_segments: u64, + pub segments: u64, + pub flags: u64, + pub segments_read: u64, + pub parsed_segments: [kernel_types::KexecSegment; kernel_types::KEXEC_SEGMENT_ARRAY_CAP], +} diff --git a/pinchy-ebpf/src/system.rs b/pinchy-ebpf/src/system.rs index 43d8773..6f6b702 100644 --- a/pinchy-ebpf/src/system.rs +++ b/pinchy-ebpf/src/system.rs @@ -467,6 +467,39 @@ pub fn syscall_exit_system(ctx: TracePointContext) -> u32 { data.bufp = args[1] as u64; data.size = args[2] as i32; } + syscalls::SYS_restart_syscall => { + let _data = data_mut!(entry, restart_syscall); + // No arguments to capture + } + syscalls::SYS_kexec_load => { + let data = data_mut!(entry, kexec_load); + data.entry = args[0] as u64; + data.nr_segments = args[1] as u64; + data.segments = args[2] as u64; + data.flags = args[3] as u64; + + let segments_ptr = args[2] as *const kernel_types::KexecSegment; + + if !segments_ptr.is_null() && data.nr_segments > 0 { + let max_to_read = core::cmp::min( + kernel_types::KEXEC_SEGMENT_ARRAY_CAP, + data.nr_segments as usize, + ); + + for i in 0..max_to_read { + unsafe { + let ptr = segments_ptr.add(i); + + if let Ok(segment) = bpf_probe_read_user(ptr) { + data.parsed_segments[i] = segment; + data.segments_read += 1; + } else { + break; + } + } + } + } + } _ => { entry.discard(); return Ok(()); diff --git a/pinchy/src/events.rs b/pinchy/src/events.rs index 60e43eb..7c2f3e7 100644 --- a/pinchy/src/events.rs +++ b/pinchy/src/events.rs @@ -4417,6 +4417,52 @@ pub async fn handle_event(event: &SyscallEvent, formatter: Formatter<'_>) -> any finish!(sf, event.return_value); } + syscalls::SYS_restart_syscall => { + let _data = unsafe { event.data.restart_syscall }; + + finish!(sf, event.return_value); + } + syscalls::SYS_kexec_load => { + let data = unsafe { event.data.kexec_load }; + + argf!(sf, "entry: 0x{:x}", data.entry); + argf!(sf, "nr_segments: {}", data.nr_segments); + + if data.segments_read > 0 { + let segments: Vec = data.parsed_segments[..data.segments_read as usize] + .iter() + .map(|seg| { + format!( + "{{buf: 0x{:x}, bufsz: {}, mem: 0x{:x}, memsz: {}}}", + seg.buf, seg.bufsz, seg.mem, seg.memsz + ) + }) + .collect(); + + if data.segments_read < data.nr_segments + && data.nr_segments + > pinchy_common::kernel_types::KEXEC_SEGMENT_ARRAY_CAP as u64 + { + argf!( + sf, + "segments: [{}... (showing {} of {})]", + segments.join(", "), + data.segments_read, + data.nr_segments + ); + } else { + argf!(sf, "segments: [{}]", segments.join(", ")); + } + } else if data.segments == 0 { + arg!(sf, "segments: NULL"); + } else { + argf!(sf, "segments: 0x{:x}", data.segments); + } + + argf!(sf, "flags: {}", format_kexec_load_flags(data.flags)); + + finish!(sf, event.return_value); + } syscalls::SYS_fanotify_init => { let data = unsafe { event.data.fanotify_init }; diff --git a/pinchy/src/format_helpers.rs b/pinchy/src/format_helpers.rs index daeb58d..0b6b95c 100644 --- a/pinchy/src/format_helpers.rs +++ b/pinchy/src/format_helpers.rs @@ -2490,7 +2490,9 @@ pub fn format_return_value(syscall_nr: i64, return_value: i64) -> std::borrow::C | syscalls::SYS_quotactl_fd | syscalls::SYS_setgroups | syscalls::SYS_getresuid - | syscalls::SYS_getresgid => match return_value { + | syscalls::SYS_getresgid + | syscalls::SYS_restart_syscall + | syscalls::SYS_kexec_load => match return_value { 0 => std::borrow::Cow::Borrowed("0 (success)"), _ => std::borrow::Cow::Owned(format!("{return_value} (error)")), }, @@ -6263,3 +6265,95 @@ pub fn format_mseal_flags(flags: u64) -> Cow<'static, str> { // Flags are reserved for future use as of Linux 6.10 Cow::Owned(format!("0x{:x} (reserved)", flags)) } + +pub mod kexec_constants { + /// kexec_load flags from linux/kexec.h + pub const KEXEC_ON_CRASH: u64 = 0x00000001; + pub const KEXEC_PRESERVE_CONTEXT: u64 = 0x00000002; + pub const KEXEC_UPDATE_ELFCOREHDR: u64 = 0x00000004; + pub const KEXEC_CRASH_HOTPLUG_SUPPORT: u64 = 0x00000008; + pub const KEXEC_ARCH_MASK: u64 = 0xffff0000; + + /// Architecture constants + pub const KEXEC_ARCH_DEFAULT: u64 = 0 << 16; + pub const KEXEC_ARCH_386: u64 = 3 << 16; + pub const KEXEC_ARCH_68K: u64 = 4 << 16; + pub const KEXEC_ARCH_PARISC: u64 = 15 << 16; + pub const KEXEC_ARCH_X86_64: u64 = 62 << 16; + pub const KEXEC_ARCH_PPC: u64 = 20 << 16; + pub const KEXEC_ARCH_PPC64: u64 = 21 << 16; + pub const KEXEC_ARCH_IA_64: u64 = 50 << 16; + pub const KEXEC_ARCH_ARM: u64 = 40 << 16; + pub const KEXEC_ARCH_S390: u64 = 22 << 16; + pub const KEXEC_ARCH_SH: u64 = 42 << 16; + pub const KEXEC_ARCH_MIPS_LE: u64 = 10 << 16; + pub const KEXEC_ARCH_MIPS: u64 = 8 << 16; + pub const KEXEC_ARCH_AARCH64: u64 = 183 << 16; + pub const KEXEC_ARCH_RISCV: u64 = 243 << 16; + pub const KEXEC_ARCH_LOONGARCH: u64 = 258 << 16; +} + +pub fn format_kexec_load_flags(flags: u64) -> Cow<'static, str> { + if flags == 0 { + return Cow::Borrowed("0"); + } + + let mut parts: Vec<&str> = Vec::new(); + + if flags & kexec_constants::KEXEC_ON_CRASH != 0 { + parts.push("KEXEC_ON_CRASH"); + } + + if flags & kexec_constants::KEXEC_PRESERVE_CONTEXT != 0 { + parts.push("KEXEC_PRESERVE_CONTEXT"); + } + + if flags & kexec_constants::KEXEC_UPDATE_ELFCOREHDR != 0 { + parts.push("KEXEC_UPDATE_ELFCOREHDR"); + } + + if flags & kexec_constants::KEXEC_CRASH_HOTPLUG_SUPPORT != 0 { + parts.push("KEXEC_CRASH_HOTPLUG_SUPPORT"); + } + + // Check for architecture flags + let arch_flags = flags & kexec_constants::KEXEC_ARCH_MASK; + + if arch_flags != 0 { + match arch_flags { + kexec_constants::KEXEC_ARCH_DEFAULT => parts.push("KEXEC_ARCH_DEFAULT"), + kexec_constants::KEXEC_ARCH_386 => parts.push("KEXEC_ARCH_386"), + kexec_constants::KEXEC_ARCH_68K => parts.push("KEXEC_ARCH_68K"), + kexec_constants::KEXEC_ARCH_PARISC => parts.push("KEXEC_ARCH_PARISC"), + kexec_constants::KEXEC_ARCH_X86_64 => parts.push("KEXEC_ARCH_X86_64"), + kexec_constants::KEXEC_ARCH_PPC => parts.push("KEXEC_ARCH_PPC"), + kexec_constants::KEXEC_ARCH_PPC64 => parts.push("KEXEC_ARCH_PPC64"), + kexec_constants::KEXEC_ARCH_IA_64 => parts.push("KEXEC_ARCH_IA_64"), + kexec_constants::KEXEC_ARCH_ARM => parts.push("KEXEC_ARCH_ARM"), + kexec_constants::KEXEC_ARCH_S390 => parts.push("KEXEC_ARCH_S390"), + kexec_constants::KEXEC_ARCH_SH => parts.push("KEXEC_ARCH_SH"), + kexec_constants::KEXEC_ARCH_MIPS_LE => parts.push("KEXEC_ARCH_MIPS_LE"), + kexec_constants::KEXEC_ARCH_MIPS => parts.push("KEXEC_ARCH_MIPS"), + kexec_constants::KEXEC_ARCH_AARCH64 => parts.push("KEXEC_ARCH_AARCH64"), + kexec_constants::KEXEC_ARCH_RISCV => parts.push("KEXEC_ARCH_RISCV"), + kexec_constants::KEXEC_ARCH_LOONGARCH => parts.push("KEXEC_ARCH_LOONGARCH"), + _ => { + let arch_num = (flags & kexec_constants::KEXEC_ARCH_MASK) >> 16; + + if parts.is_empty() { + return format!("0x{:x} (KEXEC_ARCH_{})", flags, arch_num).into(); + } else { + return format!( + "0x{:x} ({}|KEXEC_ARCH_{})", + flags, + parts.join("|"), + arch_num + ) + .into(); + } + } + } + } + + format!("0x{:x} ({})", flags, parts.join("|")).into() +} diff --git a/pinchy/src/server.rs b/pinchy/src/server.rs index 458c6e1..b3db851 100644 --- a/pinchy/src/server.rs +++ b/pinchy/src/server.rs @@ -719,6 +719,8 @@ fn load_tailcalls(ebpf: &mut Ebpf) -> anyhow::Result<()> { syscalls::SYS_perf_event_open, syscalls::SYS_bpf, syscalls::SYS_syslog, + syscalls::SYS_restart_syscall, + syscalls::SYS_kexec_load, ]; let system_prog: &mut aya::programs::TracePoint = ebpf .program_mut("syscall_exit_system") diff --git a/pinchy/src/tests/system.rs b/pinchy/src/tests/system.rs index 62fefb1..dfc3819 100644 --- a/pinchy/src/tests/system.rs +++ b/pinchy/src/tests/system.rs @@ -6,19 +6,19 @@ use pinchy_common::{ syscalls::{ SYS_add_key, SYS_bpf, SYS_capget, SYS_capset, SYS_clock_nanosleep, SYS_delete_module, SYS_finit_module, SYS_getcpu, SYS_getrandom, SYS_gettimeofday, SYS_init_module, SYS_ioctl, - SYS_ioprio_get, SYS_ioprio_set, SYS_keyctl, SYS_landlock_add_rule, + SYS_ioprio_get, SYS_ioprio_set, SYS_kexec_load, SYS_keyctl, SYS_landlock_add_rule, SYS_landlock_create_ruleset, SYS_landlock_restrict_self, SYS_nanosleep, - SYS_perf_event_open, SYS_personality, SYS_reboot, SYS_request_key, SYS_setdomainname, - SYS_sethostname, SYS_settimeofday, SYS_sync, SYS_sysinfo, SYS_syslog, SYS_times, SYS_umask, - SYS_uname, SYS_vhangup, + SYS_perf_event_open, SYS_personality, SYS_reboot, SYS_request_key, SYS_restart_syscall, + SYS_setdomainname, SYS_sethostname, SYS_settimeofday, SYS_sync, SYS_sysinfo, SYS_syslog, + SYS_times, SYS_umask, SYS_uname, SYS_vhangup, }, AddKeyData, BpfData, CapsetgetData, ClockNanosleepData, DeleteModuleData, ExitGroupData, FinitModuleData, GetcpuData, GetrandomData, GettimeofdayData, InitModuleData, IoctlData, - IoprioGetData, IoprioSetData, KeyctlData, LandlockAddRuleData, LandlockCreateRulesetData, - LandlockRestrictSelfData, NanosleepData, PerfEventOpenData, PersonalityData, RebootData, - RequestKeyData, RtSigreturnData, SetdomainnameData, SethostnameData, SettimeofdayData, - SyncData, SyscallEvent, SyscallEventData, SysinfoData, SyslogData, TimesData, UmaskData, - UnameData, VhangupData, + IoprioGetData, IoprioSetData, KexecLoadData, KeyctlData, LandlockAddRuleData, + LandlockCreateRulesetData, LandlockRestrictSelfData, NanosleepData, PerfEventOpenData, + PersonalityData, RebootData, RequestKeyData, RestartSyscallData, RtSigreturnData, + SetdomainnameData, SethostnameData, SettimeofdayData, SyncData, SyscallEvent, SyscallEventData, + SysinfoData, SyslogData, TimesData, UmaskData, UnameData, VhangupData, }; use crate::syscall_test; @@ -1901,3 +1901,128 @@ syscall_test!( }, "9102 syslog(type: SYSLOG_ACTION_READ, bufp: 0x0, size: 0) = -1 (error)\n" ); + +syscall_test!( + parse_restart_syscall, + { + SyscallEvent { + syscall_nr: SYS_restart_syscall, + pid: 123, + tid: 123, + return_value: 0, + data: SyscallEventData { + restart_syscall: RestartSyscallData::default(), + }, + } + }, + "123 restart_syscall() = 0 (success)\n" +); + +syscall_test!( + parse_kexec_load_basic, + { + SyscallEvent { + syscall_nr: SYS_kexec_load, + pid: 123, + tid: 123, + return_value: 0, + data: SyscallEventData { + kexec_load: KexecLoadData { + entry: 0x80000000, + nr_segments: 4, + segments: 0x7fff0000, + flags: 0, + segments_read: 0, + parsed_segments: Default::default(), + }, + }, + } + }, + "123 kexec_load(entry: 0x80000000, nr_segments: 4, segments: 0x7fff0000, flags: 0) = 0 (success)\n" +); + +syscall_test!( + parse_kexec_load_on_crash, + { + SyscallEvent { + syscall_nr: SYS_kexec_load, + pid: 123, + tid: 123, + return_value: 0, + data: SyscallEventData { + kexec_load: KexecLoadData { + entry: 0x80000000, + nr_segments: 2, + segments: 0x7fff0000, + flags: crate::format_helpers::kexec_constants::KEXEC_ON_CRASH, + segments_read: 0, + parsed_segments: Default::default(), + }, + }, + } + }, + "123 kexec_load(entry: 0x80000000, nr_segments: 2, segments: 0x7fff0000, flags: 0x1 (KEXEC_ON_CRASH)) = 0 (success)\n" +); + +syscall_test!( + parse_kexec_load_with_arch, + { + SyscallEvent { + syscall_nr: SYS_kexec_load, + pid: 123, + tid: 123, + return_value: 0, + data: SyscallEventData { + kexec_load: KexecLoadData { + entry: 0x80000000, + nr_segments: 3, + segments: 0x7fff0000, + flags: crate::format_helpers::kexec_constants::KEXEC_ON_CRASH + | crate::format_helpers::kexec_constants::KEXEC_ARCH_X86_64, + segments_read: 0, + parsed_segments: Default::default(), + }, + }, + } + }, + "123 kexec_load(entry: 0x80000000, nr_segments: 3, segments: 0x7fff0000, flags: 0x3e0001 (KEXEC_ON_CRASH|KEXEC_ARCH_X86_64)) = 0 (success)\n" +); + +syscall_test!( + parse_kexec_load_with_segments, + { + use pinchy_common::kernel_types::KexecSegment; + + let mut parsed_segments = [KexecSegment::default(); 16]; + parsed_segments[0] = KexecSegment { + buf: 0x1000, + bufsz: 4096, + mem: 0x100000, + memsz: 4096, + }; + parsed_segments[1] = KexecSegment { + buf: 0x2000, + bufsz: 8192, + mem: 0x200000, + memsz: 8192, + }; + + SyscallEvent { + syscall_nr: SYS_kexec_load, + pid: 123, + tid: 123, + return_value: 0, + data: SyscallEventData { + kexec_load: KexecLoadData { + entry: 0x80000000, + nr_segments: 2, + segments: 0x7fff0000, + flags: 0, + segments_read: 2, + parsed_segments, + }, + }, + } + }, + "123 kexec_load(entry: 0x80000000, nr_segments: 2, segments: [{buf: 0x1000, bufsz: 4096, mem: 0x100000, memsz: 4096}, {buf: 0x2000, bufsz: 8192, mem: 0x200000, memsz: 8192}], flags: 0) = 0 (success)\n" +);