From e9fa794d6274f6e8887985ef24a6b8ba93d634bd Mon Sep 17 00:00:00 2001 From: Christiano Haesbaert Date: Wed, 26 Jun 2024 13:49:32 +0200 Subject: [PATCH] Fetch everything, everywhere, all at once. Issue #45 This diff unifies the tracking of task_struct{} across all probes and ebpf events. We now update ~all task_struct values everywhere we can. Now exit and exec embed a raw_task{} into their own raw_type and functions taht operate solely on raw_task{} have been unified so we stop repeating code. TASK_SAMPLE takes a parameter for register as maybe we will have to track from another register in the future Now samples can include TASK_SAMPLE, like exec_connector does. This creates an issue as before we were creating the wire protocol carefully aligned by making sure it's all ordered 64->32->16->8 and so on. If we place TASK_SAMPLE in the beginning of the sample, we might end up unaligned, but if we add it in the end, then a task_sample stops being standalone as it defines `probe_ip` like all other samples. We solve this by defining an aligned/non-padded task_sample, and we enforce this via #pragma forbidden magic. Luckly it is aligned in its current state, if we add more members in the future, we may manually pad it with additional members. We also start getting ppid from the wire instead of hardcoding it, that's because ppid might change either when a process daemonizes or when docker is doing its magic to start a container, which can change ppid multiple times via PR_SET_CHILD_SUBREAPER blood magic. See issue #43. This PR is pending on the merge of https://github.com/elastic/ebpf/pull/197 --- bpf_queue.c | 102 +++++----- btf.c | 1 + elastic-ebpf/GPL/Events/EbpfEventProto.h | 3 +- elastic-ebpf/GPL/Events/Process/Probe.bpf.c | 6 + kprobe_defs.h | 149 ++++++--------- kprobe_queue.c | 196 +++++++++----------- quark.c | 115 +++++------- quark.h | 28 +-- 8 files changed, 266 insertions(+), 334 deletions(-) diff --git a/bpf_queue.c b/bpf_queue.c index 02dfc4a..9ae43a9 100644 --- a/bpf_queue.c +++ b/bpf_queue.c @@ -33,34 +33,40 @@ libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) return (0); } +struct ebpf_ctx { + struct ebpf_pid_info *pids; + struct ebpf_cred_info *creds; + struct ebpf_tty_dev *ctty; + char *comm; + char *cwd; +}; + static void -ebpf_events_to_task(struct ebpf_pid_info *pids, struct ebpf_cred_info *creds, - struct ebpf_tty_dev *tty, struct raw_task *task, u32 *pid) +ebpf_ctx_to_task(struct ebpf_ctx *ebpf_ctx, struct raw_task *task) { - *pid = pids->tid; - task->ppid = pids->ppid; - task->start_boottime = pids->start_time_ns; /* XXX check format */ task->cap_inheritable = 0; /* unavailable */ - task->cap_permitted = creds->cap_permitted; - task->cap_effective = creds->cap_effective; + task->cap_permitted = ebpf_ctx->creds->cap_permitted; + task->cap_effective = ebpf_ctx->creds->cap_effective; task->cap_bset = 0; /* unavailable */ task->cap_ambient = 0; /* unavailable */ - task->uid = creds->ruid; - task->gid = creds->rgid; - task->suid = creds->suid; - task->sgid = creds->sgid; - task->euid = creds->euid; - task->egid = creds->egid; - task->pgid = pids->pgid; - task->sid = pids->sid; - if (tty != NULL) { - task->tty_major = tty->major; - task->tty_minor = tty->minor; - } else { - task->tty_major = 0; - task->tty_minor = 0; - } - task->exit_time_event = 0; + task->start_boottime = ebpf_ctx->pids->start_time_ns; /* XXX check format */ + task->uid = ebpf_ctx->creds->ruid; + task->gid = ebpf_ctx->creds->rgid; + task->suid = ebpf_ctx->creds->suid; + task->sgid = ebpf_ctx->creds->sgid; + task->euid = ebpf_ctx->creds->euid; + task->egid = ebpf_ctx->creds->egid; + task->pgid = ebpf_ctx->pids->pgid; + task->sid = ebpf_ctx->pids->sid; + task->ppid = ebpf_ctx->pids->ppid; + /* skip exit_* */ + task->tty_major = ebpf_ctx->ctty->major; + task->tty_minor = ebpf_ctx->ctty->minor; + if (ebpf_ctx->cwd != NULL) + qstr_strcpy(&task->cwd, ebpf_ctx->cwd); + else + qstr_strcpy(&task->cwd, "(invalid)"); + strlcpy(task->comm, ebpf_ctx->comm, sizeof(task->comm)); } static struct raw_event * @@ -71,7 +77,9 @@ ebpf_events_to_raw(struct ebpf_event_header *ev) struct ebpf_process_exit_event *exit; struct ebpf_process_exec_event *exec; struct ebpf_varlen_field *field; + struct ebpf_ctx ebpf_ctx; + bzero(&ebpf_ctx, sizeof(ebpf_ctx)); raw = NULL; switch (ev->type) { @@ -81,19 +89,25 @@ ebpf_events_to_raw(struct ebpf_event_header *ev) goto bad; if ((raw = raw_event_alloc(RAW_WAKE_UP_NEW_TASK)) == NULL) goto bad; + raw->pid = fork->child_pids.tid; raw->time = ev->ts; - ebpf_events_to_task(&fork->child_pids, &fork->creds, &fork->ctty, - &raw->task, &raw->pid); + ebpf_ctx.pids = &fork->child_pids; + ebpf_ctx.creds = &fork->creds; + ebpf_ctx.ctty = &fork->ctty; + ebpf_ctx.comm = fork->comm; + ebpf_ctx.cwd = NULL; /* the macro doesn't take a pointer so we can't pass down :) */ FOR_EACH_VARLEN_FIELD(fork->vl_fields, field) { switch (field->type) { case EBPF_VL_FIELD_CWD: - qstr_strcpy(&raw->task.cwd, field->data); + ebpf_ctx.cwd = field->data; break; default: break; } } + ebpf_ctx_to_task(&ebpf_ctx, &raw->task); + break; case EBPF_EVENT_PROCESS_EXIT: exit = (struct ebpf_process_exit_event *)ev; @@ -101,36 +115,35 @@ ebpf_events_to_raw(struct ebpf_event_header *ev) goto bad; if ((raw = raw_event_alloc(RAW_EXIT_THREAD)) == NULL) goto bad; + raw->pid = exit->pids.tid; raw->time = ev->ts; - ebpf_events_to_task(&exit->pids, &exit->creds, NULL, - &raw->task, &raw->pid); + ebpf_ctx.pids = &exit->pids; + ebpf_ctx.creds = &exit->creds; + ebpf_ctx.ctty = &exit->ctty; + ebpf_ctx.comm = exit->comm; + ebpf_ctx.cwd = NULL; raw->task.exit_code = exit->exit_code; raw->task.exit_time_event = raw->time; - /* the macro doesn't take a pointer so we can't pass down :) */ - FOR_EACH_VARLEN_FIELD(exit->vl_fields, field) { - switch (field->type) { - case EBPF_VL_FIELD_CWD: - qstr_strcpy(&raw->task.cwd, field->data); - break; - default: - break; - } - } + ebpf_ctx_to_task(&ebpf_ctx, &raw->task); + break; case EBPF_EVENT_PROCESS_EXEC: exec = (struct ebpf_process_exec_event *)ev; if ((raw = raw_event_alloc(RAW_EXEC)) == NULL) goto bad; + raw->pid = exec->pids.tid; raw->time = ev->ts; raw->exec.flags |= RAW_EXEC_F_EXT; - ebpf_events_to_task(&exec->pids, &exec->creds, &exec->ctty, - &raw->exec.ext.task, &raw->pid); - strlcpy(raw->exec.ext.comm, exec->comm, - sizeof(raw->exec.ext.comm)); + ebpf_ctx.pids = &exec->pids; + ebpf_ctx.creds = &exec->creds; + ebpf_ctx.ctty = &exec->ctty; + ebpf_ctx.comm = exec->comm; + ebpf_ctx.cwd = NULL; + FOR_EACH_VARLEN_FIELD(exec->vl_fields, field) { switch (field->type) { case EBPF_VL_FIELD_CWD: - qstr_strcpy(&raw->exec.ext.task.cwd, field->data); + ebpf_ctx.cwd = field->data; break; case EBPF_VL_FIELD_FILENAME: qstr_strcpy(&raw->exec.filename, field->data); @@ -149,6 +162,8 @@ ebpf_events_to_raw(struct ebpf_event_header *ev) break; } } + ebpf_ctx_to_task(&ebpf_ctx, &raw->exec.ext.task); + break; default: warnx("%s unhandled type %lu", __func__, ev->type); @@ -160,6 +175,7 @@ ebpf_events_to_raw(struct ebpf_event_header *ev) bad: if (raw != NULL) raw_event_free(raw); + return (NULL); } diff --git a/btf.c b/btf.c index 5434c71..cb908ff 100644 --- a/btf.c +++ b/btf.c @@ -42,6 +42,7 @@ struct quark_btf_target targets[] = { { "task_struct.mm", -1 }, { "task_struct.pid", -1 }, { "task_struct.pids", -1 }, + { "task_struct.real_parent", -1 }, { "task_struct.start_boottime", -1 }, /* or task_struct.real_start_time */ { "task_struct.signal", -1 }, /* or task_struct.pids via KLUDGE */ { "task_struct.tgid", -1 }, diff --git a/elastic-ebpf/GPL/Events/EbpfEventProto.h b/elastic-ebpf/GPL/Events/EbpfEventProto.h index 9156b4e..d858832 100644 --- a/elastic-ebpf/GPL/Events/EbpfEventProto.h +++ b/elastic-ebpf/GPL/Events/EbpfEventProto.h @@ -241,8 +241,9 @@ struct ebpf_process_exit_event { struct ebpf_event_header hdr; struct ebpf_pid_info pids; struct ebpf_cred_info creds; - int32_t exit_code; + struct ebpf_tty_dev ctty; char comm[TASK_COMM_LEN]; + int32_t exit_code; // Variable length fields: pids_ss_cgroup_path struct ebpf_varlen_fields_start vl_fields; diff --git a/elastic-ebpf/GPL/Events/Process/Probe.bpf.c b/elastic-ebpf/GPL/Events/Process/Probe.bpf.c index 243abc2..00b5cb0 100644 --- a/elastic-ebpf/GPL/Events/Process/Probe.bpf.c +++ b/elastic-ebpf/GPL/Events/Process/Probe.bpf.c @@ -70,6 +70,11 @@ int BPF_PROG(sched_process_fork, const struct task_struct *parent, const struct size = ebpf_resolve_pids_ss_cgroup_path_to_string(field->data, child); ebpf_vl_field__set_size(&event->vl_fields, field, size); + // cwd + field = ebpf_vl_field__add(&event->vl_fields, EBPF_VL_FIELD_CWD); + size = ebpf_resolve_path_to_string(field->data, &child->fs->pwd, child); + ebpf_vl_field__set_size(&event->vl_fields, field, size); + bpf_ringbuf_output(&ringbuf, event, EVENT_SIZE(event), 0); out: @@ -201,6 +206,7 @@ static int taskstats_exit__enter(const struct task_struct *task, int group_dead) event->exit_code = (exit_code >> 8) & 0xFF; ebpf_pid_info__fill(&event->pids, task); ebpf_cred_info__fill(&event->creds, task); + ebpf_ctty__fill(&event->ctty, task); ebpf_comm__fill(event->comm, sizeof(event->comm), task); // Variable length fields diff --git a/kprobe_defs.h b/kprobe_defs.h index 453837d..2511953 100644 --- a/kprobe_defs.h +++ b/kprobe_defs.h @@ -18,9 +18,6 @@ #define XS(_a) S(_a) #define PWD_K(_t, _o) "task_struct.fs fs_struct.pwd.dentry " XS(RPT(_t, _o, dentry.d_parent)) #define PWD_S(_t, _o) "task_struct.fs fs_struct.pwd.dentry " XS(RPT(_t, _o, dentry.d_parent)) " dentry.d_name.name +0" -#define TTY_MAJOR "task_struct.signal signal_struct.tty tty_struct.driver tty_driver.major" -#define TTY_MINOR_START "task_struct.signal signal_struct.tty tty_struct.driver tty_driver.minor_start" -#define TTY_MINOR_INDEX "task_struct.signal signal_struct.tty tty_struct.index" struct kprobe_arg ka_task_old_pgid = { "pgid", "di", "u32", "task_struct.group_leader (task_struct.pids+8) (pid.numbers+0).upid.nr" @@ -39,57 +36,60 @@ struct kprobe_arg ka_task_new_sid = { }; -#define TASK_SAMPLE { \ - { "cap_inheritable", "di", "u64", "task_struct.cred cred.cap_inheritable" }, \ - { "cap_permitted", "di", "u64", "task_struct.cred cred.cap_permitted", }, \ - { "cap_effective", "di", "u64", "task_struct.cred cred.cap_effective" }, \ - { "cap_bset", "di", "u64", "task_struct.cred cred.cap_bset" }, \ - { "cap_ambient", "di", "u64", "task_struct.cred cred.cap_ambient" }, \ - { "start_boottime", "di", "u64", "task_struct.start_boottime" }, \ - { "tty_addr", "di", "u64", "task_struct.signal signal_struct.tty" }, \ - { "root_k", "di", "u64", "task_struct.fs fs_struct.root.dentry" }, \ - { "mnt_root_k", "di", "u64", "task_struct.fs fs_struct.pwd.mnt vfsmount.mnt_root" }, \ - { "mnt_mountpoint_k", "di", "u64", "task_struct.fs fs_struct.pwd.mnt (mount.mnt_mountpoint-mount.mnt)" }, \ - { "pwd_k0", "di", "u64", PWD_K(0, 0) }, \ - { "pwd_k1", "di", "u64", PWD_K(0, 1) }, \ - { "pwd_k2", "di", "u64", PWD_K(0, 2) }, \ - { "pwd_k3", "di", "u64", PWD_K(0, 3) }, \ - { "pwd_k4", "di", "u64", PWD_K(0, 4) }, \ - { "pwd_k5", "di", "u64", PWD_K(0, 5) }, \ - { "pwd_k6", "di", "u64", PWD_K(0, 6) }, \ - { "root_s", "di", "string", "task_struct.fs fs_struct.root.dentry dentry.d_name.name +0" }, \ - { "mnt_root_s", "di", "string", "task_struct.fs fs_struct.pwd.mnt vfsmount.mnt_root dentry.d_name.name +0" }, \ - { "mnt_mountpoint_s", "di", "string", "task_struct.fs fs_struct.pwd.mnt (mount.mnt_mountpoint-mount.mnt) dentry.d_name.name +0" }, \ - { "pwd_s0", "di", "string", PWD_S(0, 0) }, \ - { "pwd_s1", "di", "string", PWD_S(0, 1) }, \ - { "pwd_s2", "di", "string", PWD_S(0, 2) }, \ - { "pwd_s3", "di", "string", PWD_S(0, 3) }, \ - { "pwd_s4", "di", "string", PWD_S(0, 4) }, \ - { "pwd_s5", "di", "string", PWD_S(0, 5) }, \ - { "pwd_s6", "di", "string", PWD_S(0, 6) }, \ - { "comm", "di", "string", "task_struct.comm" }, \ - { "uid", "di", "u32", "task_struct.cred cred.uid" }, \ - { "gid", "di", "u32", "task_struct.cred cred.gid" }, \ - { "suid", "di", "u32", "task_struct.cred cred.suid" }, \ - { "sgid", "di", "u32", "task_struct.cred cred.sgid" }, \ - { "euid", "di", "u32", "task_struct.cred cred.euid" }, \ - { "egid", "di", "u32", "task_struct.cred cred.egid" }, \ - { "pgid", "di", "u32", "KLUDGE - see kprobe_kludge_arg()" }, \ - { "sid", "di", "u32", "KLUDGE - see kprobe_kludge_arg()" }, \ - { "pid", "di", "u32", "task_struct.tgid" }, \ - { "tid", "di", "u32", "task_struct.pid" }, \ - { "exit_code", "di", "s32", "task_struct.exit_code" }, \ - { "tty_major", "di", "u32", TTY_MAJOR }, \ - { "tty_minor_start", "di", "u32", TTY_MINOR_START }, \ - { "tty_minor_index", "di", "u32", TTY_MINOR_INDEX }, \ - { NULL, NULL, NULL, NULL }} +#define TASK_SAMPLE(_r) \ + { "cap_inheritable", S(_r), "u64", "task_struct.cred cred.cap_inheritable" }, \ + { "cap_permitted", S(_r), "u64", "task_struct.cred cred.cap_permitted", }, \ + { "cap_effective", S(_r), "u64", "task_struct.cred cred.cap_effective" }, \ + { "cap_bset", S(_r), "u64", "task_struct.cred cred.cap_bset" }, \ + { "cap_ambient", S(_r), "u64", "task_struct.cred cred.cap_ambient" }, \ + { "start_boottime", S(_r), "u64", "task_struct.start_boottime" }, \ + { "tty_addr", S(_r), "u64", "task_struct.signal signal_struct.tty" }, \ + { "root_k", S(_r), "u64", "task_struct.fs fs_struct.root.dentry" }, \ + { "mnt_root_k", S(_r), "u64", "task_struct.fs fs_struct.pwd.mnt vfsmount.mnt_root" }, \ + { "mnt_mountpoint_k", S(_r), "u64", "task_struct.fs fs_struct.pwd.mnt (mount.mnt_mountpoint-mount.mnt)" }, \ + { "pwd_k0", S(_r), "u64", PWD_K(0, 0) }, \ + { "pwd_k1", S(_r), "u64", PWD_K(0, 1) }, \ + { "pwd_k2", S(_r), "u64", PWD_K(0, 2) }, \ + { "pwd_k3", S(_r), "u64", PWD_K(0, 3) }, \ + { "pwd_k4", S(_r), "u64", PWD_K(0, 4) }, \ + { "pwd_k5", S(_r), "u64", PWD_K(0, 5) }, \ + { "pwd_k6", S(_r), "u64", PWD_K(0, 6) }, \ + { "root_s", S(_r), "string", "task_struct.fs fs_struct.root.dentry dentry.d_name.name +0" }, \ + { "mnt_root_s", S(_r), "string", "task_struct.fs fs_struct.pwd.mnt vfsmount.mnt_root dentry.d_name.name +0" }, \ + { "mnt_mountpoint_s", S(_r), "string", "task_struct.fs fs_struct.pwd.mnt (mount.mnt_mountpoint-mount.mnt) dentry.d_name.name +0" }, \ + { "pwd_s0", S(_r), "string", PWD_S(0, 0) }, \ + { "pwd_s1", S(_r), "string", PWD_S(0, 1) }, \ + { "pwd_s2", S(_r), "string", PWD_S(0, 2) }, \ + { "pwd_s3", S(_r), "string", PWD_S(0, 3) }, \ + { "pwd_s4", S(_r), "string", PWD_S(0, 4) }, \ + { "pwd_s5", S(_r), "string", PWD_S(0, 5) }, \ + { "pwd_s6", S(_r), "string", PWD_S(0, 6) }, \ + { "comm", S(_r), "string", "task_struct.comm" }, \ + { "uid", S(_r), "u32", "task_struct.cred cred.uid" }, \ + { "gid", S(_r), "u32", "task_struct.cred cred.gid" }, \ + { "suid", S(_r), "u32", "task_struct.cred cred.suid" }, \ + { "sgid", S(_r), "u32", "task_struct.cred cred.sgid" }, \ + { "euid", S(_r), "u32", "task_struct.cred cred.euid" }, \ + { "egid", S(_r), "u32", "task_struct.cred cred.egid" }, \ + { "pgid", S(_r), "u32", "KLUDGE - see kprobe_kludge_arg()" }, \ + { "sid", S(_r), "u32", "KLUDGE - see kprobe_kludge_arg()" }, \ + { "pid", S(_r), "u32", "task_struct.tgid" }, \ + { "tid", S(_r), "u32", "task_struct.pid" }, \ + { "ppid", S(_r), "u32", "task_struct.group_leader task_struct.real_parent task_struct.tgid" }, \ + { "exit_code", S(_r), "s32", "task_struct.exit_code" }, \ + { "tty_major", S(_r), "u32", "task_struct.signal signal_struct.tty tty_struct.driver tty_driver.major" }, \ + { "tty_minor_start", S(_r), "u32", "task_struct.signal signal_struct.tty tty_struct.driver tty_driver.minor_start" }, \ + { "tty_minor_index", S(_r), "u32", "task_struct.signal signal_struct.tty tty_struct.index" } struct kprobe kp_wake_up_new_task = { "quark_wake_up_new_task", "wake_up_new_task", WAKE_UP_NEW_TASK_SAMPLE, 0, - TASK_SAMPLE + { + TASK_SAMPLE(di), + { NULL, NULL, NULL, NULL }, + } }; struct kprobe kp_exit_thread = { @@ -97,7 +97,10 @@ struct kprobe kp_exit_thread = { "exit_thread", EXIT_THREAD_SAMPLE, 0, - TASK_SAMPLE + { + TASK_SAMPLE(di), + { NULL, NULL, NULL, NULL }, + } }; struct kprobe kp_exec_connector = { @@ -106,6 +109,7 @@ struct kprobe kp_exec_connector = { EXEC_CONNECTOR_SAMPLE, 0, { + TASK_SAMPLE(di), { "argc", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +0" }, { "stack_0", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +0" }, { "stack_1", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +8" }, @@ -167,56 +171,9 @@ struct kprobe kp_exec_connector = { { "stack_57", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +464" }, { "stack_58", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +472" }, { "stack_59", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +480" }, - { "stack_60", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +488" }, - { "stack_61", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +496" }, - { "stack_62", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +504" }, - { "stack_63", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +512" }, - { "stack_64", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +520" }, - { "stack_65", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +528" }, - { "stack_66", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +536" }, - { "stack_67", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +544" }, - { "stack_68", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +552" }, - { "stack_69", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +560" }, - { "stack_70", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +568" }, - { "stack_71", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +576" }, - { "stack_72", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +584" }, - { "stack_73", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +592" }, - { "stack_74", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +600" }, - { "stack_75", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +608" }, - { "stack_76", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +616" }, - { "stack_77", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +624" }, - { "stack_78", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +632" }, - { "stack_79", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +640" }, - { "stack_80", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +648" }, - { "stack_81", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +656" }, - { "stack_82", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +664" }, - { "stack_83", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +672" }, - { "stack_84", "di", "u64", "task_struct.mm mm_struct.(anon).start_stack +8 +680" }, - { "cap_inheritable", "di", "u64", "task_struct.cred cred.cap_inheritable" }, - { "cap_permitted", "di", "u64", "task_struct.cred cred.cap_permitted", }, - { "cap_effective", "di", "u64", "task_struct.cred cred.cap_effective" }, - { "cap_bset", "di", "u64", "task_struct.cred cred.cap_bset" }, - { "cap_ambient", "di", "u64", "task_struct.cred cred.cap_ambient" }, - { "start_boottime", "di", "u64", "task_struct.start_boottime" }, - { "tty_addr", "di", "u64", "task_struct.signal signal_struct.tty" }, - { "comm", "di", "string", "task_struct.comm" }, - { "uid", "di", "u32", "task_struct.cred cred.uid" }, - { "gid", "di", "u32", "task_struct.cred cred.gid" }, - { "suid", "di", "u32", "task_struct.cred cred.suid" }, - { "sgid", "di", "u32", "task_struct.cred cred.sgid" }, - { "euid", "di", "u32", "task_struct.cred cred.euid" }, - { "egid", "di", "u32", "task_struct.cred cred.egid" }, - { "pgid", "di", "u32", "task_struct.group_leader task_struct.signal (signal_struct.pids+16) (pid.numbers+0).upid.nr" }, \ - { "sid", "di", "u32", "task_struct.group_leader task_struct.signal (signal_struct.pids+24) (pid.numbers+0).upid.nr" }, \ - { "tty_major", "di", "u32", TTY_MAJOR }, - { "tty_minor_start", "di", "u32", TTY_MINOR_START }, - { "tty_minor_index", "di", "u32", TTY_MINOR_INDEX }, { NULL, NULL, NULL, NULL }, }}; -#undef TTY_MINOR_INDEX -#undef TTY_MINOR_START -#undef TTY_MAJOR #undef PWD_S #undef PWD_K #undef XS diff --git a/kprobe_queue.c b/kprobe_queue.c index a111f94..011c0b3 100644 --- a/kprobe_queue.c +++ b/kprobe_queue.c @@ -118,6 +118,12 @@ struct perf_group_leader { struct perf_mmap mmap; }; +/* + * Forbid padding on samples/wire structures + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wpadded" + struct exec_sample { struct perf_sample_data_loc filename; s32 pid; @@ -157,6 +163,7 @@ struct task_sample { u32 sid; u32 pid; u32 tid; + u32 ppid; s32 exit_code; u32 tty_major; u32 tty_minor_start; @@ -166,34 +173,18 @@ struct task_sample { }; struct exec_connector_sample { + struct task_sample task_sample; /* must be 8 byte aligned */ /* 64bit */ - u64 probe_ip; u64 argc; - u64 stack[85]; /* sync with kprobe_defs */ - u64 cap_inheritable; - u64 cap_permitted; - u64 cap_effective; - u64 cap_bset; - u64 cap_ambient; - u64 start_boottime; - u64 tty_addr; - /* 32bit */ - struct perf_sample_data_loc comm; - u32 uid; - u32 gid; - u32 suid; - u32 sgid; - u32 euid; - u32 egid; - u32 pgid; - u32 sid; - u32 tty_major; - u32 tty_minor_start; - u32 tty_minor_index; - /* 16bit */ - /* 8bit */ + u64 stack[60]; /* sync with kprobe_defs */ }; +#pragma GCC diagnostic pop + +/* + * End samples/wire/ structures + */ + struct kprobe_state { TAILQ_ENTRY(kprobe_state) entry; struct kprobe *k; @@ -353,6 +344,64 @@ qstr_copy_data_loc(struct qstr *qstr, return (data_loc->size); } +static void +task_sample_to_raw_task(struct kprobe_queue *kqq, int kind, + struct perf_record_sample *sample, struct raw_task *task) +{ + struct task_sample *w = sample_data_body(kqq, sample); + struct path_ctx pctx; + int i; + + task->cap_inheritable = w->cap_inheritable; + task->cap_permitted = w->cap_permitted; + task->cap_effective = w->cap_effective; + task->cap_bset = w->cap_bset; + task->cap_ambient = w->cap_ambient; + task->start_boottime = w->start_boottime; + task->uid = w->uid; + task->gid = w->gid; + task->suid = w->suid; + task->sgid = w->sgid; + task->euid = w->euid; + task->egid = w->egid; + task->pgid = w->pgid; + task->sid = w->sid; + task->ppid = w->ppid; + if (w->tty_addr) { + task->tty_major = w->tty_major; + task->tty_minor = w->tty_minor_start + w->tty_minor_index; + } + /* cwd below */ + strlcpy(task->comm, str_of_dataloc(sample, &w->comm), + sizeof(task->comm)); + if (kind == EXIT_THREAD_SAMPLE) { + task->exit_code = (w->exit_code >> 8) & 0xff; + task->exit_time_event = sample->sample_id.time; + qstr_strcpy(&task->cwd, "(exited)"); + /* No cwd on exit */ + return; + } + + task->exit_code = -1; + task->exit_time_event = 0; + + /* Consider moving all this inside build_path() */ + pctx.root = str_of_dataloc(sample, &w->root_s); + pctx.root_k = w->root_k; + pctx.mnt_root = str_of_dataloc(sample, &w->mnt_root_s); + pctx.mnt_root_k = w->mnt_root_k; + pctx.mnt_mountpoint = str_of_dataloc(sample, + &w->mnt_mountpoint_s); + pctx.mnt_mountpoint_k = w->mnt_mountpoint_k; + for (i = 0; i < (int)nitems(pctx.pwd); i++) { + pctx.pwd[i].pwd = str_of_dataloc(sample, + &w->pwd_s[i]); + pctx.pwd[i].pwd_k = w->pwd_k[i]; + } + if (build_path(&pctx, &task->cwd) == -1) + warn("can't build path"); +} + static struct raw_event * perf_sample_to_raw(struct quark_queue *qq, struct perf_record_sample *sample) { @@ -377,8 +426,7 @@ perf_sample_to_raw(struct quark_queue *qq, struct perf_record_sample *sample) case WAKE_UP_NEW_TASK_SAMPLE: /* FALLTHROUGH */ case EXIT_THREAD_SAMPLE: { struct task_sample *w = sample_data_body(kqq, sample); - struct path_ctx pctx; - int i; + int raw_type; /* * ev->sample.sample_id.pid is the parent, if the new task has * the same pid as it, then this is a thread event @@ -386,66 +434,18 @@ perf_sample_to_raw(struct quark_queue *qq, struct perf_record_sample *sample) if ((qq->flags & QQ_THREAD_EVENTS) == 0 && w->pid != w->tid) return (NULL); - if (kind == WAKE_UP_NEW_TASK_SAMPLE) { - if ((raw = raw_event_alloc(RAW_WAKE_UP_NEW_TASK)) == NULL) - return (NULL); - /* - * Cheat, make this look like a child event. - */ + raw_type = kind == WAKE_UP_NEW_TASK_SAMPLE ? + RAW_WAKE_UP_NEW_TASK : RAW_EXIT_THREAD; + if ((raw = raw_event_alloc(raw_type)) == NULL) + return (NULL); + /* + * Cheat, make it look like a child event + */ + if (raw_type == RAW_WAKE_UP_NEW_TASK) { raw->pid = w->pid; raw->tid = w->tid; - raw->task.ppid = sample->sample_id.pid; - pctx.root = str_of_dataloc(sample, &w->root_s); - pctx.root_k = w->root_k; - pctx.mnt_root = str_of_dataloc(sample, &w->mnt_root_s); - pctx.mnt_root_k = w->mnt_root_k; - pctx.mnt_mountpoint = str_of_dataloc(sample, - &w->mnt_mountpoint_s); - pctx.mnt_mountpoint_k = w->mnt_mountpoint_k; - for (i = 0; i < (int)nitems(pctx.pwd); i++) { - pctx.pwd[i].pwd = str_of_dataloc(sample, - &w->pwd_s[i]); - pctx.pwd[i].pwd_k = w->pwd_k[i]; - } - if (build_path(&pctx, &raw->task.cwd) == -1) - warn("can't build path"); - raw->task.exit_code = -1; - raw->task.exit_time_event = 0; - if (w->tty_addr) { - raw->task.tty_major = w->tty_major; - raw->task.tty_minor = w->tty_minor_start + - w->tty_minor_index; - } - } else { - if ((raw = raw_event_alloc(RAW_EXIT_THREAD)) == NULL) - return (NULL); - /* - * We derive ppid from the incoming sample header as - * it's originally an event of the parent, since exit is - * originally an event of the child, we don't have - * access to ppid. - */ - raw->task.ppid = -1; - raw->task.exit_code = (w->exit_code >> 8) & 0xff; - raw->task.exit_time_event = sample->sample_id.time; } - strlcpy(raw->task.comm, str_of_dataloc(sample, &w->comm), - sizeof(raw->task.comm)); - raw->task.cap_inheritable = w->cap_inheritable; - raw->task.cap_permitted = w->cap_permitted; - raw->task.cap_effective = w->cap_effective; - raw->task.cap_bset = w->cap_bset; - raw->task.cap_ambient = w->cap_ambient; - raw->task.start_boottime = w->start_boottime; - raw->task.uid = w->uid; - raw->task.gid = w->gid; - raw->task.suid = w->suid; - raw->task.sgid = w->sgid; - raw->task.euid = w->euid; - raw->task.egid = w->egid; - raw->task.pgid = w->pgid; - raw->task.sid = w->sid; - + task_sample_to_raw_task(kqq, kind, sample, &raw->task); break; } case EXEC_CONNECTOR_SAMPLE: { @@ -473,27 +473,7 @@ perf_sample_to_raw(struct quark_queue *qq, struct perf_record_sample *sample) warnx("can't copy args"); exec->args.p[exec->args_len - 1] = 0; } - exec->cap_inheritable = exec_sample->cap_inheritable; - exec->cap_permitted = exec_sample->cap_permitted; - exec->cap_effective = exec_sample->cap_effective; - exec->cap_bset = exec_sample->cap_bset; - exec->cap_ambient = exec_sample->cap_ambient; - exec->start_boottime = exec_sample->start_boottime; - exec->uid = exec_sample->uid; - exec->gid = exec_sample->gid; - exec->suid = exec_sample->suid; - exec->sgid = exec_sample->sgid; - exec->euid = exec_sample->euid; - exec->egid = exec_sample->egid; - exec->pgid = exec_sample->pgid; - exec->sid = exec_sample->sid; - if (exec_sample->tty_addr) { - exec->tty_major = exec_sample->tty_major; - exec->tty_minor = (exec_sample->tty_minor_start + - exec_sample->tty_minor_index); - } - strlcpy(exec->comm, str_of_dataloc(sample, &exec_sample->comm), - sizeof(exec->comm)); + task_sample_to_raw_task(kqq, kind, sample, &exec->task); break; } default: @@ -860,7 +840,9 @@ kprobe_kludge_arg(struct kprobe *k, struct kprobe_arg *karg, * within task_struct. So if signal_struct.pids exists, it's the "new" * version. */ - if ((k == &kp_wake_up_new_task || k == &kp_exit_thread) && + if ((k == &kp_wake_up_new_task || + k == &kp_exit_thread || + k == &kp_exec_connector) && !strcmp(karg->name, "pgid")) { if (quark_btf_offset(qbtf, "signal_struct.pids") == -1) return (&ka_task_old_pgid); @@ -868,7 +850,9 @@ kprobe_kludge_arg(struct kprobe *k, struct kprobe_arg *karg, return (&ka_task_new_pgid); } - if ((k == &kp_wake_up_new_task || k == &kp_exit_thread) && + if ((k == &kp_wake_up_new_task || + k == &kp_exit_thread || + k == &kp_exec_connector) && !strcmp(karg->name, "sid")) { if (quark_btf_offset(qbtf, "signal_struct.pids") == -1) return (&ka_task_old_sid); diff --git a/quark.c b/quark.c index 9fdac60..89cc351 100644 --- a/quark.c +++ b/quark.c @@ -69,6 +69,7 @@ raw_event_alloc(int type) break; case RAW_EXEC_CONNECTOR: qstr_init(&raw->exec_connector.args); + qstr_init(&raw->exec_connector.task.cwd); break; case RAW_COMM: /* nada */ break; @@ -396,6 +397,8 @@ event_cache_inherit(struct quark_queue *qq, struct quark_event *qev, int ppid) if ((parent = event_cache_get(qq, ppid, 0)) == NULL) return; + /* Ignore QUARK_F_PROC? as we always have it all on fork */ + if (parent->flags & QUARK_F_COMM) { qev->flags |= QUARK_F_COMM; strlcpy(qev->comm, parent->comm, sizeof(qev->comm)); @@ -873,11 +876,11 @@ quark_event_dump(struct quark_event *qev, FILE *f) #undef P static int -raw_event_to_quark_event(struct quark_queue *qq, struct raw_event *raw, struct quark_event *dst) +raw_event_to_quark_event(struct quark_queue *qq, struct raw_event *src, struct quark_event *dst) { struct quark_event *qev; struct raw_event *agg; - struct raw_task *raw_task, *raw_exit; + struct raw_task *raw_fork, *raw_exit, *raw_task; struct raw_comm *raw_comm; struct raw_exec *raw_exec; struct raw_exec_connector *raw_exec_connector; @@ -888,8 +891,9 @@ raw_event_to_quark_event(struct quark_queue *qq, struct raw_event *raw, struct q int do_cache; u64 events; - raw_task = NULL; + raw_fork = NULL; raw_exit = NULL; + raw_task = NULL; raw_comm = NULL; raw_exec = NULL; raw_exec_connector = NULL; @@ -901,47 +905,47 @@ raw_event_to_quark_event(struct quark_queue *qq, struct raw_event *raw, struct q if (do_cache) { /* XXX pass if this is a fork down, so we can evict the old one XXX */ - qev = event_cache_get(qq, raw->pid, 1); + qev = event_cache_get(qq, src->pid, 1); if (qev == NULL) return (-1); } else { qev = dst; - qev->pid = raw->pid; + qev->pid = src->pid; qev->flags = 0; } events = 0; - switch (raw->type) { + switch (src->type) { case RAW_WAKE_UP_NEW_TASK: events |= QUARK_EV_FORK; - raw_task = &raw->task; + raw_fork = &src->task; break; case RAW_EXEC: events |= QUARK_EV_EXEC; - raw_exec = &raw->exec; + raw_exec = &src->exec; break; case RAW_EXIT_THREAD: events |= QUARK_EV_EXIT; - raw_exit = &raw->task; + raw_exit = &src->task; break; case RAW_COMM: events |= QUARK_EV_SETPROCTITLE; - raw_comm = &raw->comm; + raw_comm = &src->comm; break; case RAW_EXEC_CONNECTOR: events |= QUARK_EV_EXEC; - raw_exec_connector = &raw->exec_connector; + raw_exec_connector = &src->exec_connector; break; default: return (errno = EINVAL, -1); break; /* NOTREACHED */ }; - TAILQ_FOREACH(agg, &raw->agg_queue, agg_entry) { + TAILQ_FOREACH(agg, &src->agg_queue, agg_entry) { switch (agg->type) { case RAW_WAKE_UP_NEW_TASK: - raw_task = &agg->task; + raw_fork = &agg->task; events |= QUARK_EV_FORK; break; case RAW_EXEC: @@ -966,12 +970,41 @@ raw_event_to_quark_event(struct quark_queue *qq, struct raw_event *raw, struct q } /* QUARK_F_PROC */ + if (raw_fork != NULL) { + event_cache_inherit(qq, qev, raw_fork->ppid); + raw_task = raw_fork; + cwd = raw_task->cwd.p; + } + if (raw_exit != NULL) { + qev->flags |= QUARK_F_EXIT; + + qev->exit_code = raw_exit->exit_code; + if (raw_exit->exit_time_event) + qev->exit_time_event = quark.boottime + raw_exit->exit_time_event; + raw_task = raw_exit; + /* cwd is invalid, don't collect */ + /* NOTE: maybe there are more things we _don't_ want from exit */ + } + if (raw_exec != NULL) { + qev->flags |= QUARK_F_FILENAME; + + strlcpy(qev->filename, raw_exec->filename.p, sizeof(qev->filename)); + if (raw_exec->flags & RAW_EXEC_F_EXT) { + args = raw_exec->ext.args.p; + args_len = raw_exec->ext.args_len; + raw_task = &raw_exec->ext.task; + cwd = raw_task->cwd.p; + } + } + if (raw_exec_connector != NULL) { + args = raw_exec_connector->args.p; + args_len = raw_exec_connector->args_len; + raw_task = &raw_exec_connector->task; + cwd = raw_task->cwd.p; + } if (raw_task != NULL) { qev->flags |= QUARK_F_PROC; - if (events & QUARK_EV_FORK) - event_cache_inherit(qq, qev, raw_task->ppid); - qev->proc_cap_inheritable = raw_task->cap_inheritable; qev->proc_cap_permitted = raw_task->cap_permitted; qev->proc_cap_effective = raw_task->cap_effective; @@ -990,57 +1023,9 @@ raw_event_to_quark_event(struct quark_queue *qq, struct raw_event *raw, struct q qev->proc_tty_major = raw_task->tty_major; qev->proc_tty_minor = raw_task->tty_minor; - cwd = raw_task->cwd.p; + /* Don't set cwd as it's not valid on exit */ comm = raw_task->comm; } - if (raw_exit != NULL) { - qev->flags |= QUARK_F_EXIT; - - qev->exit_code = raw_exit->exit_code; - if (raw_exit->exit_time_event) - qev->exit_time_event = quark.boottime + raw_exit->exit_time_event; - /* XXX consider updating task values since we have them here XXX */ - } - if (raw_exec != NULL) { - qev->flags |= QUARK_F_FILENAME; - - strlcpy(qev->filename, raw_exec->filename.p, sizeof(qev->filename)); - if (raw_exec->flags & RAW_EXEC_F_EXT) { - args = raw_exec->ext.args.p; - args_len = raw_exec->ext.args_len; - cwd = raw_exec->ext.task.cwd.p; - comm = raw_exec->ext.comm; - qev->proc_pgid = raw_exec->ext.task.pgid; - qev->proc_sid = raw_exec->ext.task.sid; - qev->proc_tty_major = raw_exec->ext.task.tty_major; - qev->proc_tty_minor = raw_exec->ext.task.tty_minor; - } - } - if (raw_exec_connector != NULL) { - qev->flags |= QUARK_F_PROC; - - comm = raw_exec_connector->comm; - args = raw_exec_connector->args.p; - args_len = raw_exec_connector->args_len; - qev->proc_cap_inheritable = raw_exec_connector->cap_inheritable; - qev->proc_cap_permitted = raw_exec_connector->cap_permitted; - qev->proc_cap_effective = raw_exec_connector->cap_effective; - qev->proc_cap_bset = raw_exec_connector->cap_bset; - qev->proc_cap_ambient = raw_exec_connector->cap_ambient; - qev->proc_time_boot = quark.boottime + raw_exec_connector->start_boottime; - /* XXX No ppid for now, see how raw_task gets it */ - /* qev->proc_ppid = raw_exec_connector->ppid; */ - qev->proc_uid = raw_exec_connector->uid; - qev->proc_gid = raw_exec_connector->gid; - qev->proc_suid = raw_exec_connector->suid; - qev->proc_sgid = raw_exec_connector->sgid; - qev->proc_euid = raw_exec_connector->euid; - qev->proc_egid = raw_exec_connector->egid; - qev->proc_pgid = raw_exec_connector->pgid; - qev->proc_sid = raw_exec_connector->sid; - qev->proc_tty_major = raw_exec_connector->tty_major; - qev->proc_tty_minor = raw_exec_connector->tty_minor; - } if (raw_comm != NULL) comm = raw_comm->comm; /* raw_comm always overrides */ /* diff --git a/quark.h b/quark.h index 03d3a09..73cf23a 100644 --- a/quark.h +++ b/quark.h @@ -134,9 +134,9 @@ struct raw_task { u32 egid; u32 pgid; u32 sid; - u32 ppid; /* Unavailable at exit */ - s32 exit_code; /* Unavailable at fork */ - u64 exit_time_event; /* Unavailable at fork */ + u32 ppid; + s32 exit_code; /* only available at exit */ + u64 exit_time_event; /* only available at exit */ u32 tty_major; u32 tty_minor; struct qstr cwd; @@ -150,34 +150,16 @@ struct raw_exec { /* available if RAW_EXEC_F_EXT */ struct { - struct raw_task task; struct qstr args; size_t args_len; - char comm[16]; /* XXX move me inside task */ + struct raw_task task; } ext; }; struct raw_exec_connector { struct qstr args; size_t args_len; - u64 cap_inheritable; - u64 cap_permitted; - u64 cap_effective; - u64 cap_bset; - u64 cap_ambient; - u64 start_boottime; - /* XXX missing ppid see how raw_task derives it */ - u32 uid; - u32 gid; - u32 suid; - u32 sgid; - u32 euid; - u32 egid; - u32 pgid; - u32 sid; - u32 tty_major; - u32 tty_minor; - char comm[16]; + struct raw_task task; }; struct raw_event {