diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3a18e650..10d2e9f9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,7 +33,7 @@ jobs: cargo install --path crates/memtrack --locked echo "CODSPEED_MEMTRACK_BINARY=$(which codspeed-memtrack)" >> $GITHUB_ENV - - run: cargo test --all --exclude memtrack + - run: cargo test --all --exclude memtrack exectrack bpf-tests: runs-on: ubuntu-latest diff --git a/Cargo.lock b/Cargo.lock index 2b19d5e6..29264ded 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -346,6 +346,20 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +[[package]] +name = "codspeed-bpf" +version = "0.1.0" +dependencies = [ + "anyhow", + "bindgen", + "libbpf-cargo", + "libbpf-rs", + "libc", + "log", + "paste", + "vmlinux", +] + [[package]] name = "codspeed-runner" version = "4.4.1" @@ -652,6 +666,24 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "exectrack" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "codspeed-bpf", + "env_logger", + "libbpf-rs", + "libc", + "log", + "runner-shared", + "serde", + "serde_json", + "static_assertions", + "test-log", +] + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -1529,6 +1561,7 @@ dependencies = [ "anyhow", "bindgen", "clap", + "codspeed-bpf", "env_logger", "ipc-channel", "itertools 0.14.0", diff --git a/Cargo.toml b/Cargo.toml index 306f51dc..89769d20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,7 +80,7 @@ rstest_reuse = "0.7.0" shell-quote = "0.7.2" [workspace] -members = ["crates/runner-shared", "crates/memtrack"] +members = ["crates/runner-shared", "crates/memtrack", "crates/codspeed-bpf", "crates/exectrack"] [workspace.dependencies] anyhow = "1.0" diff --git a/crates/codspeed-bpf/Cargo.toml b/crates/codspeed-bpf/Cargo.toml new file mode 100644 index 00000000..922c9fae --- /dev/null +++ b/crates/codspeed-bpf/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "codspeed-bpf" +version = "0.1.0" +edition = "2024" +repository = "https://github.com/CodSpeedHQ/runner" + +[features] +# Enable build utilities for BPF compilation +build = ["dep:libbpf-cargo", "dep:vmlinux", "dep:bindgen"] + +[dependencies] +anyhow = { workspace = true } +libbpf-rs = { version = "0.25.0", features = ["vendored"] } +libc = { workspace = true } +log = { workspace = true } +paste = "1.0" + +# Build dependencies (optional, only needed for build feature) +libbpf-cargo = { version = "0.25.0", optional = true } +vmlinux = { git = "https://github.com/libbpf/vmlinux.h.git", rev = "991dd4b8dfd8c9d62ce8999521b24f61d9b7fc52", optional = true } +bindgen = { version = "0.71", optional = true } diff --git a/crates/codspeed-bpf/src/build.rs b/crates/codspeed-bpf/src/build.rs new file mode 100644 index 00000000..98380bd8 --- /dev/null +++ b/crates/codspeed-bpf/src/build.rs @@ -0,0 +1,80 @@ +//! Build utilities for BPF programs using codspeed-bpf +//! +//! This module provides helpers for building BPF programs that use codspeed-bpf headers. +//! +//! To use this in your build.rs, add codspeed-bpf with the "build" feature to your +//! [build-dependencies]: +//! +//! ```toml +//! [build-dependencies] +//! codspeed-bpf = { path = "../codspeed-bpf", features = ["build"] } +//! ``` +//! +//! Then in your build.rs: +//! +//! ```ignore +//! fn main() { +//! codspeed_bpf::build::build_bpf("exectrack", "src/ebpf/c/exectrack.bpf.c"); +//! codspeed_bpf::build::generate_bindings("wrapper.h"); +//! } +//! ``` + +use std::{env, path::PathBuf}; + +/// Build a BPF program with codspeed-bpf headers available +/// +/// # Arguments +/// * `program_name` - The name of the BPF program (e.g., "exectrack", "memtrack") +/// * `source_file` - The path to the BPF source file (e.g., "src/ebpf/c/exectrack.bpf.c") +/// +/// This function will: +/// 1. Compile the BPF program using libbpf-cargo +/// 2. Include codspeed-bpf headers in the clang search path +/// 3. Generate a skeleton into OUT_DIR/{program_name}.skel.rs +pub fn build_bpf(program_name: &str, source_file: &str) { + use libbpf_cargo::SkeletonBuilder; + + println!("cargo:rerun-if-changed=src/ebpf/c"); + + let arch = env::var("CARGO_CFG_TARGET_ARCH") + .expect("CARGO_CFG_TARGET_ARCH must be set in build script"); + + let output = + PathBuf::from(env::var("OUT_DIR").unwrap()).join(format!("{program_name}.skel.rs")); + + // Get the path to codspeed-bpf's C headers + let codspeed_bpf_include = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()) + .parent() + .unwrap() + .join("codspeed-bpf/src/c"); + + SkeletonBuilder::new() + .source(source_file) + .clang_args([ + "-I", + &vmlinux::include_path_root().join(arch).to_string_lossy(), + "-I", + &codspeed_bpf_include.to_string_lossy(), + ]) + .build_and_generate(&output) + .unwrap_or_else(|_| panic!("Failed to build {program_name}.bpf.c")); +} + +/// Generate Rust bindings for a C header file +/// +/// # Arguments +/// * `header_file` - The path to the header file (e.g., "wrapper.h") +/// +/// This function will: +/// 1. Use bindgen to generate Rust bindings +/// 2. Write the output to OUT_DIR/event.rs +pub fn generate_bindings(header_file: &str) { + let bindings = bindgen::Builder::default() + .header(header_file) + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .generate() + .expect("Unable to generate bindings"); + + let out_file = PathBuf::from(env::var("OUT_DIR").unwrap()).join("event.rs"); + std::fs::write(&out_file, bindings.to_string()).expect("Couldn't write bindings!"); +} diff --git a/crates/codspeed-bpf/src/c/codspeed/common.h b/crates/codspeed-bpf/src/c/codspeed/common.h new file mode 100644 index 00000000..0fcedb65 --- /dev/null +++ b/crates/codspeed-bpf/src/c/codspeed/common.h @@ -0,0 +1,27 @@ +#ifndef __CODSPEED_COMMON_H__ +#define __CODSPEED_COMMON_H__ + +/* Macros for common map definitions */ +#define BPF_HASH_MAP(name, key_type, value_type, max_ents) \ + struct { \ + __uint(type, BPF_MAP_TYPE_HASH); \ + __uint(max_entries, max_ents); \ + __type(key, key_type); \ + __type(value, value_type); \ + } name SEC(".maps") + +#define BPF_ARRAY_MAP(name, value_type, max_ents) \ + struct { \ + __uint(type, BPF_MAP_TYPE_ARRAY); \ + __uint(max_entries, max_ents); \ + __type(key, __u32); \ + __type(value, value_type); \ + } name SEC(".maps") + +#define BPF_RINGBUF(name, size) \ + struct { \ + __uint(type, BPF_MAP_TYPE_RINGBUF); \ + __uint(max_entries, size); \ + } name SEC(".maps") + +#endif /* __CODSPEED_COMMON_H__ */ diff --git a/crates/codspeed-bpf/src/c/codspeed/process_tracking.h b/crates/codspeed-bpf/src/c/codspeed/process_tracking.h new file mode 100644 index 00000000..003985af --- /dev/null +++ b/crates/codspeed-bpf/src/c/codspeed/process_tracking.h @@ -0,0 +1,52 @@ +#ifndef __CODSPEED_PROCESS_TRACKING_H__ +#define __CODSPEED_PROCESS_TRACKING_H__ + +#include "common.h" + +/* Standard process tracking maps - define these in your BPF program + * This macro creates the map definitions. The helper functions below will + * reference these maps, so call this macro before including this header's helpers. + */ +#define PROCESS_TRACKING_MAPS() \ + BPF_HASH_MAP(tracked_pids, __u32, __u8, 10000); \ + BPF_HASH_MAP(pids_ppid, __u32, __u32, 10000); \ + /* Helper functions defined below */ \ + static __always_inline int is_tracked(__u32 pid); \ + static __always_inline int handle_fork(__u32 parent_pid, __u32 child_pid); \ + static __always_inline int handle_exit(__u32 pid); \ + /* is_tracked implementation */ \ + static __always_inline int is_tracked(__u32 pid) { \ + if (bpf_map_lookup_elem(&tracked_pids, &pid)) { \ + return 1; \ + } \ + _Pragma("unroll") \ + for (int i = 0; i < 5; i++) { \ + __u32* ppid = bpf_map_lookup_elem(&pids_ppid, &pid); \ + if (!ppid) { \ + break; \ + } \ + pid = *ppid; \ + if (bpf_map_lookup_elem(&tracked_pids, &pid)) { \ + return 1; \ + } \ + } \ + return 0; \ + } \ + /* handle_fork implementation */ \ + static __always_inline int handle_fork(__u32 parent_pid, __u32 child_pid) { \ + if (is_tracked(parent_pid)) { \ + __u8 marker = 1; \ + bpf_map_update_elem(&tracked_pids, &child_pid, &marker, BPF_ANY); \ + bpf_map_update_elem(&pids_ppid, &child_pid, &parent_pid, BPF_ANY); \ + return 1; \ + } \ + return 0; \ + } \ + /* handle_exit implementation */ \ + static __always_inline int handle_exit(__u32 pid) { \ + bpf_map_delete_elem(&tracked_pids, &pid); \ + bpf_map_delete_elem(&pids_ppid, &pid); \ + return 0; \ + } + +#endif /* __CODSPEED_PROCESS_TRACKING_H__ */ diff --git a/crates/codspeed-bpf/src/lib.rs b/crates/codspeed-bpf/src/lib.rs new file mode 100644 index 00000000..b74f4c6f --- /dev/null +++ b/crates/codspeed-bpf/src/lib.rs @@ -0,0 +1,17 @@ +//! Shared BPF utilities for CodSpeed tracing tools +//! +//! This crate provides common functionality for BPF-based tracing: +//! - Process hierarchy tracking +//! - Ring buffer polling +//! - Probe attachment macros +//! - Build utilities for BPF programs + +pub mod macros; +pub mod poller; +pub mod process_tracking; + +#[cfg(feature = "build")] +pub mod build; + +pub use poller::{EventHandler, RingBufferPoller}; +pub use process_tracking::{ProcessTracking, bump_memlock_rlimit}; diff --git a/crates/codspeed-bpf/src/macros.rs b/crates/codspeed-bpf/src/macros.rs new file mode 100644 index 00000000..cbbab0db --- /dev/null +++ b/crates/codspeed-bpf/src/macros.rs @@ -0,0 +1,132 @@ +//! Macros for attaching uprobes and tracepoints + +/// Generate methods to attach uprobe/uretprobe pairs +#[macro_export] +macro_rules! attach_uprobe_uretprobe { + ($name:ident, $prog_entry:ident, $prog_return:ident, $func_str:expr) => { + fn $name(&mut self, target_path: &std::path::Path) -> anyhow::Result<()> { + use anyhow::Context; + use libbpf_rs::UprobeOpts; + + let link = self + .skel + .progs + .$prog_entry + .attach_uprobe_with_opts( + -1, + target_path, + 0, + UprobeOpts { + func_name: Some($func_str.to_string()), + retprobe: false, + ..Default::default() + }, + ) + .context(format!( + "Failed to attach {} uprobe in {}", + $func_str, + target_path.display() + ))?; + self.probes.push(link); + + let link = self + .skel + .progs + .$prog_return + .attach_uprobe_with_opts( + -1, + target_path, + 0, + UprobeOpts { + func_name: Some($func_str.to_string()), + retprobe: true, + ..Default::default() + }, + ) + .context(format!( + "Failed to attach {} uretprobe in {}", + $func_str, + target_path.display() + ))?; + self.probes.push(link); + + Ok(()) + } + }; + ($name:ident) => { + paste::paste! { + $crate::attach_uprobe_uretprobe!( + [], + [], + [], + stringify!($name) + ); + } + }; +} + +/// Generate methods to attach single uprobes (entry only) +#[macro_export] +macro_rules! attach_uprobe { + ($name:ident, $prog:ident, $func_str:expr) => { + fn $name(&mut self, target_path: &std::path::Path) -> anyhow::Result<()> { + use anyhow::Context; + use libbpf_rs::UprobeOpts; + + let link = self + .skel + .progs + .$prog + .attach_uprobe_with_opts( + -1, + target_path, + 0, + UprobeOpts { + func_name: Some($func_str.to_string()), + retprobe: false, + ..Default::default() + }, + ) + .context(format!( + "Failed to attach {} uprobe in {}", + $func_str, + target_path.display() + ))?; + self.probes.push(link); + Ok(()) + } + }; + ($name:ident) => { + paste::paste! { + $crate::attach_uprobe!( + [], + [], + stringify!($name) + ); + } + }; +} + +/// Generate methods to attach tracepoints +#[macro_export] +macro_rules! attach_tracepoint { + ($func:ident, $prog:ident) => { + fn $func(&mut self) -> anyhow::Result<()> { + use anyhow::Context; + + let link = self + .skel + .progs + .$prog + .attach() + .context(format!("Failed to attach {} tracepoint", stringify!($prog)))?; + self.probes.push(link); + Ok(()) + } + }; + ($name:ident) => { + paste::paste! { + $crate::attach_tracepoint!([], []); + } + }; +} diff --git a/crates/memtrack/src/ebpf/poller.rs b/crates/codspeed-bpf/src/poller.rs similarity index 83% rename from crates/memtrack/src/ebpf/poller.rs rename to crates/codspeed-bpf/src/poller.rs index 05884773..421969aa 100644 --- a/crates/memtrack/src/ebpf/poller.rs +++ b/crates/codspeed-bpf/src/poller.rs @@ -5,10 +5,8 @@ use std::sync::{Arc, mpsc}; use std::thread::JoinHandle; use std::time::Duration; -use super::events::Event; - /// A handler function for processing ring buffer events -pub type EventHandler = Box; +pub type EventHandler = Box; /// RingBufferPoller manages polling a BPF ring buffer in a background thread /// and sending events to handlers @@ -20,19 +18,23 @@ pub struct RingBufferPoller { impl RingBufferPoller { /// Create a new RingBufferPoller for the given ring buffer map /// + /// # Type Parameters + /// * `T` - The event type to deserialize from the ring buffer + /// * `M` - The map type implementing MapCore + /// /// # Arguments /// * `rb_map` - The BPF ring buffer map to poll /// * `handler` - Callback function to handle each event /// * `poll_timeout_ms` - How long to wait for events in each poll iteration - pub fn new( + pub fn new( rb_map: &M, - handler: EventHandler, + handler: EventHandler, poll_timeout_ms: u64, ) -> Result { let mut builder = RingBufferBuilder::new(); builder.add(rb_map, move |data| { - if data.len() >= std::mem::size_of::() { - let event = unsafe { &*(data.as_ptr() as *const Event) }; + if data.len() >= std::mem::size_of::() { + let event = unsafe { &*(data.as_ptr() as *const T) }; handler(*event); } 0 @@ -57,10 +59,10 @@ impl RingBufferPoller { /// Create a new RingBufferPoller with an mpsc channel for events /// /// Returns the RingBufferPoller and the receiver end of the channel - pub fn with_channel( + pub fn with_channel( rb_map: &M, poll_timeout_ms: u64, - ) -> Result<(Self, mpsc::Receiver)> { + ) -> Result<(Self, mpsc::Receiver)> { let (tx, rx) = mpsc::channel(); let poller = Self::new( rb_map, diff --git a/crates/codspeed-bpf/src/process_tracking.rs b/crates/codspeed-bpf/src/process_tracking.rs new file mode 100644 index 00000000..f5c08b79 --- /dev/null +++ b/crates/codspeed-bpf/src/process_tracking.rs @@ -0,0 +1,38 @@ +use anyhow::{Context, Result}; +use libbpf_rs::MapCore; + +/// Bump memlock rlimit to allow BPF ring buffer allocation +/// +/// BPF programs require the RLIMIT_MEMLOCK resource limit to be increased +/// to infinity to allow allocation of ring buffers. This function sets that limit. +pub fn bump_memlock_rlimit() -> Result<()> { + let rlimit = libc::rlimit { + rlim_cur: libc::RLIM_INFINITY, + rlim_max: libc::RLIM_INFINITY, + }; + + let ret = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &rlimit) }; + if ret != 0 { + anyhow::bail!("Failed to increase rlimit"); + } + + Ok(()) +} + +/// Trait for BPF programs that implement process tracking +pub trait ProcessTracking { + /// Get the tracked_pids map + fn tracked_pids_map(&self) -> &impl MapCore; + + /// Add a PID to track + fn add_tracked_pid(&self, pid: i32) -> Result<()> { + self.tracked_pids_map() + .update( + &pid.to_le_bytes(), + &1u8.to_le_bytes(), + libbpf_rs::MapFlags::ANY, + ) + .context("Failed to add PID to tracked set")?; + Ok(()) + } +} diff --git a/crates/exectrack/Cargo.toml b/crates/exectrack/Cargo.toml new file mode 100644 index 00000000..0f991a99 --- /dev/null +++ b/crates/exectrack/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "exectrack" +version = "0.1.0" +edition = "2024" +repository = "https://github.com/CodSpeedHQ/runner" + +[lib] +name = "exectrack" +path = "src/lib.rs" + +[[bin]] +name = "codspeed-exectrack" +path = "src/main.rs" + +[dependencies] +anyhow = { workspace = true } +clap = { workspace = true } +libc = { workspace = true } +log = { workspace = true } +env_logger = { workspace = true } +serde_json = { workspace = true } +serde = { workspace = true } +static_assertions = "1.1" +libbpf-rs = { version = "0.25.0", features = ["vendored"] } +codspeed-bpf = { path = "../codspeed-bpf" } +runner-shared = { path = "../runner-shared" } + +[build-dependencies] +codspeed-bpf = { path = "../codspeed-bpf", features = ["build"] } + +[dev-dependencies] +test-log = "0.2.19" diff --git a/crates/exectrack/build.rs b/crates/exectrack/build.rs new file mode 100644 index 00000000..31e3509f --- /dev/null +++ b/crates/exectrack/build.rs @@ -0,0 +1,4 @@ +fn main() { + codspeed_bpf::build::build_bpf("exectrack", "src/ebpf/c/exectrack.bpf.c"); + codspeed_bpf::build::generate_bindings("wrapper.h"); +} diff --git a/crates/exectrack/src/ebpf/c/event.h b/crates/exectrack/src/ebpf/c/event.h new file mode 100644 index 00000000..eda91221 --- /dev/null +++ b/crates/exectrack/src/ebpf/c/event.h @@ -0,0 +1,18 @@ +#ifndef __EXECTRACK_EVENT_H__ +#define __EXECTRACK_EVENT_H__ + +#define EVENT_TYPE_FORK 1 +#define EVENT_TYPE_EXEC 2 +#define EVENT_TYPE_EXIT 3 + +/* Event structure - shared between BPF and userspace */ +struct event { + uint8_t event_type; /* See EVENT_TYPE_* constants above */ + uint64_t timestamp; /* monotonic time in nanoseconds (CLOCK_MONOTONIC) */ + uint32_t pid; /* Process ID */ + uint32_t tid; /* Thread ID */ + uint32_t ppid; /* Parent Process ID (for fork events) */ + char comm[16]; /* Command name (null-terminated) */ +}; + +#endif /* __EXECTRACK_EVENT_H__ */ diff --git a/crates/exectrack/src/ebpf/c/exectrack.bpf.c b/crates/exectrack/src/ebpf/c/exectrack.bpf.c new file mode 100644 index 00000000..c608b54b --- /dev/null +++ b/crates/exectrack/src/ebpf/c/exectrack.bpf.c @@ -0,0 +1,90 @@ +// clang-format off +#include "vmlinux.h" +// clang-format on + +#include +#include +#include + +#include "event.h" + +// Include shared BPF utilities from codspeed-bpf +#include "codspeed/common.h" +#include "codspeed/process_tracking.h" + +char LICENSE[] SEC("license") = "GPL"; + +// Define standard process tracking maps using shared macro +PROCESS_TRACKING_MAPS(); + +// Define ring buffer for events +BPF_RINGBUF(events, 256 * 1024); + +/* Helper to submit an event to the ring buffer */ +static __always_inline int submit_event(__u8 event_type, __u32 pid, __u32 ppid) { + if (!is_tracked(pid) && !is_tracked(ppid)) { + return 0; + } + + struct event* e = bpf_ringbuf_reserve(&events, sizeof(*e), 0); + if (!e) { + return 0; + } + + __u64 tid_full = bpf_get_current_pid_tgid(); + + e->timestamp = bpf_ktime_get_ns(); + e->pid = pid; + e->tid = tid_full & 0xFFFFFFFF; + e->ppid = ppid; + e->event_type = event_type; + + // Get current command name + bpf_get_current_comm(e->comm, sizeof(e->comm)); + + bpf_ringbuf_submit(e, 0); + return 0; +} + +/* Track process creation via fork/clone */ +SEC("tracepoint/sched/sched_process_fork") +int tracepoint_sched_fork(struct trace_event_raw_sched_process_fork* ctx) { + __u32 parent_pid = ctx->parent_pid; + __u32 child_pid = ctx->child_pid; + + // Use shared fork handler to track child + if (handle_fork(parent_pid, child_pid)) { + // Submit fork event with parent/child relationship + submit_event(EVENT_TYPE_FORK, child_pid, parent_pid); + } + + return 0; +} + +/* Track process execution via execve */ +SEC("tracepoint/sched/sched_process_exec") +int tracepoint_sched_exec(struct trace_event_raw_sched_process_exec* ctx) { + __u64 tid = bpf_get_current_pid_tgid(); + __u32 pid = tid >> 32; + + if (is_tracked(pid)) { + submit_event(EVENT_TYPE_EXEC, pid, 0); + } + + return 0; +} + +/* Track process termination via exit */ +SEC("tracepoint/sched/sched_process_exit") +int tracepoint_sched_exit(struct trace_event_raw_sched_process_template* ctx) { + __u64 tid = bpf_get_current_pid_tgid(); + __u32 pid = tid >> 32; + + if (is_tracked(pid)) { + submit_event(EVENT_TYPE_EXIT, pid, 0); + // Use shared exit handler to clean up maps + handle_exit(pid); + } + + return 0; +} diff --git a/crates/exectrack/src/ebpf/events.rs b/crates/exectrack/src/ebpf/events.rs new file mode 100644 index 00000000..3656fe03 --- /dev/null +++ b/crates/exectrack/src/ebpf/events.rs @@ -0,0 +1,77 @@ +use serde::{Deserialize, Serialize}; + +// Include the bindings for event.h +pub mod bindings { + #![allow(non_upper_case_globals)] + #![allow(non_camel_case_types)] + #![allow(non_snake_case)] + #![allow(dead_code)] + + include!(concat!(env!("OUT_DIR"), "/event.rs")); +} +use bindings::*; + +#[repr(u8)] +#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)] +pub enum EventType { + Fork = EVENT_TYPE_FORK as u8, + Exec = EVENT_TYPE_EXEC as u8, + Exit = EVENT_TYPE_EXIT as u8, +} + +impl From for EventType { + fn from(val: u8) -> Self { + match val as u32 { + bindings::EVENT_TYPE_FORK => EventType::Fork, + bindings::EVENT_TYPE_EXEC => EventType::Exec, + bindings::EVENT_TYPE_EXIT => EventType::Exit, + _ => panic!("Unknown event type: {val}"), + } + } +} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct Event { + pub event_type: u8, + pub timestamp: u64, + pub pid: u32, + pub tid: u32, + pub ppid: u32, + pub comm: [u8; 16], +} + +impl Event { + /// Get the event type as an enum + pub fn event_type(&self) -> EventType { + EventType::from(self.event_type) + } + + /// Get the command name as a string + pub fn comm_str(&self) -> &str { + let len = self.comm.iter().position(|&c| c == 0).unwrap_or(16); + std::str::from_utf8(&self.comm[..len]).unwrap_or("") + } +} + +// Static assertions for C/Rust ABI safety +mod assertions { + use super::*; + use static_assertions::{assert_eq_align, assert_eq_size, const_assert_eq}; + use std::mem::offset_of; + + assert_eq_size!(Event, bindings::event); + assert_eq_align!(Event, bindings::event); + + const_assert_eq!( + offset_of!(Event, timestamp), + offset_of!(bindings::event, timestamp) + ); + const_assert_eq!(offset_of!(Event, pid), offset_of!(bindings::event, pid)); + const_assert_eq!(offset_of!(Event, tid), offset_of!(bindings::event, tid)); + const_assert_eq!( + offset_of!(Event, event_type), + offset_of!(bindings::event, event_type) + ); + const_assert_eq!(offset_of!(Event, comm), offset_of!(bindings::event, comm)); +} diff --git a/crates/exectrack/src/ebpf/exectrack.rs b/crates/exectrack/src/ebpf/exectrack.rs new file mode 100644 index 00000000..a7f2b9fb --- /dev/null +++ b/crates/exectrack/src/ebpf/exectrack.rs @@ -0,0 +1,66 @@ +use anyhow::{Context, Result}; +use codspeed_bpf::ProcessTracking; +use codspeed_bpf::{RingBufferPoller, attach_tracepoint}; +use libbpf_rs::Link; +use libbpf_rs::skel::{OpenSkel, SkelBuilder}; +use std::mem::MaybeUninit; + +pub mod exectrack_skel { + include!(concat!(env!("OUT_DIR"), "/exectrack.skel.rs")); +} +pub use exectrack_skel::*; + +pub struct ExectrackBpf { + skel: Box>, + probes: Vec, +} + +impl ExectrackBpf { + pub fn new() -> Result { + let builder = ExectrackSkelBuilder::default(); + let open_object = Box::leak(Box::new(MaybeUninit::uninit())); + let open_skel = builder + .open(open_object) + .context("Failed to open exectrack BPF skeleton")?; + + let skel = Box::new( + open_skel + .load() + .context("Failed to load exectrack BPF skeleton")?, + ); + + Ok(Self { + skel, + probes: Vec::new(), + }) + } + + // Use the shared macro from codspeed-bpf for attaching tracepoints + attach_tracepoint!(attach_sched_fork, tracepoint_sched_fork); + attach_tracepoint!(attach_sched_exec, tracepoint_sched_exec); + attach_tracepoint!(attach_sched_exit, tracepoint_sched_exit); + + pub fn attach_tracepoints(&mut self) -> Result<()> { + self.attach_sched_fork()?; + self.attach_sched_exec()?; + self.attach_sched_exit()?; + Ok(()) + } + + /// Start polling with an mpsc channel for events + pub fn start_polling_with_channel( + &self, + poll_timeout_ms: u64, + ) -> Result<( + RingBufferPoller, + std::sync::mpsc::Receiver, + )> { + RingBufferPoller::with_channel(&self.skel.maps.events, poll_timeout_ms) + } +} + +impl ProcessTracking for ExectrackBpf { + fn tracked_pids_map(&self) -> &impl libbpf_rs::MapCore { + &self.skel.maps.tracked_pids + } +} diff --git a/crates/exectrack/src/ebpf/mod.rs b/crates/exectrack/src/ebpf/mod.rs new file mode 100644 index 00000000..747eb9c9 --- /dev/null +++ b/crates/exectrack/src/ebpf/mod.rs @@ -0,0 +1,7 @@ +pub mod events; +mod exectrack; +mod tracker; + +pub use events::{Event, EventType}; +pub use exectrack::ExectrackBpf; +pub use tracker::Tracker; diff --git a/crates/exectrack/src/ebpf/tracker.rs b/crates/exectrack/src/ebpf/tracker.rs new file mode 100644 index 00000000..e7040370 --- /dev/null +++ b/crates/exectrack/src/ebpf/tracker.rs @@ -0,0 +1,56 @@ +use crate::ebpf::{Event, ExectrackBpf}; +use anyhow::Result; +use codspeed_bpf::{ProcessTracking, RingBufferPoller, bump_memlock_rlimit}; +use log::debug; +use std::sync::mpsc::{self, Receiver}; + +pub struct Tracker { + bpf: ExectrackBpf, + poller: Option, +} + +impl Tracker { + /// Create a new tracker instance + /// + /// This will: + /// - Initialize the BPF subsystem + /// - Bump memlock limits + /// - Attach tracepoints for process tracking + pub fn new() -> Result { + // Bump memlock limits using shared utility + bump_memlock_rlimit()?; + + let mut bpf = ExectrackBpf::new()?; + bpf.attach_tracepoints()?; + + Ok(Self { bpf, poller: None }) + } + + /// Start tracking execution events for a specific PID + /// + /// Returns a receiver channel that will receive execution events. + /// The receiver will continue to produce events until the tracker is dropped. + pub fn track(&mut self, pid: i32) -> Result> { + // Add the PID to track + self.bpf.add_tracked_pid(pid)?; + debug!("Tracking PID {pid} for execution events"); + + // Start polling with channel + let (poller, event_rx) = self.bpf.start_polling_with_channel(10)?; + self.poller = Some(poller); + + // Forward events from the poller channel to a new channel + // This allows the caller to drop the tracker and still receive events + // until the original channel closes + let (tx, rx) = mpsc::channel(); + std::thread::spawn(move || { + while let Ok(event) = event_rx.recv() { + if tx.send(event).is_err() { + break; + } + } + }); + + Ok(rx) + } +} diff --git a/crates/exectrack/src/hierarchy.rs b/crates/exectrack/src/hierarchy.rs new file mode 100644 index 00000000..ab7e38a4 --- /dev/null +++ b/crates/exectrack/src/hierarchy.rs @@ -0,0 +1,96 @@ +use crate::ebpf::{Event, EventType}; +use runner_shared::artifacts::{ProcessHierarchy, ProcessMetadata}; +use std::collections::HashMap; + +/// Builds and maintains a ProcessHierarchy from eBPF events +pub struct HierarchyBuilder { + hierarchy: ProcessHierarchy, + /// Maps PID to parent PID + parent_map: HashMap, +} + +impl HierarchyBuilder { + /// Create a new hierarchy builder for a root PID + pub fn new(root_pid: i32) -> Self { + Self { + hierarchy: ProcessHierarchy { + root_pid, + processes: HashMap::new(), + children: HashMap::new(), + }, + parent_map: HashMap::new(), + } + } + + /// Process an event and update the hierarchy + pub fn process_event(&mut self, event: &Event) { + let pid = event.pid as i32; + let comm = event.comm_str().to_string(); + let timestamp = event.timestamp; + + match event.event_type() { + EventType::Exec => { + // Update or create process metadata on exec + self.hierarchy + .processes + .entry(pid) + .and_modify(|p| p.name = comm.clone()) + .or_insert_with(|| ProcessMetadata { + pid, + name: comm, + start_time: timestamp, + exit_code: None, + stop_time: None, + }); + } + EventType::Fork => { + // Fork event - establish parent-child relationship + let ppid = event.ppid as i32; + + // Create or update process metadata for the child + if !self.hierarchy.processes.contains_key(&pid) { + self.hierarchy.processes.insert( + pid, + ProcessMetadata { + pid, + name: comm, + start_time: timestamp, + exit_code: None, + stop_time: None, + }, + ); + } + + // Register the parent-child relationship + self.register_parent_child(ppid, pid); + } + EventType::Exit => { + // Update process metadata with exit info + if let Some(metadata) = self.hierarchy.processes.get_mut(&pid) { + metadata.exit_code = Some(event.tid as i32); // Exit code in tid field + metadata.stop_time = Some(timestamp); + } + } + } + } + + /// Register a parent-child relationship + pub fn register_parent_child(&mut self, parent_pid: i32, child_pid: i32) { + self.parent_map.insert(child_pid, parent_pid); + self.hierarchy + .children + .entry(parent_pid) + .or_default() + .push(child_pid); + } + + /// Get the completed hierarchy + pub fn into_hierarchy(self) -> ProcessHierarchy { + self.hierarchy + } + + /// Get a reference to the current hierarchy + pub fn hierarchy(&self) -> &ProcessHierarchy { + &self.hierarchy + } +} diff --git a/crates/exectrack/src/lib.rs b/crates/exectrack/src/lib.rs new file mode 100644 index 00000000..22d6086f --- /dev/null +++ b/crates/exectrack/src/lib.rs @@ -0,0 +1,6 @@ +pub mod ebpf; +pub mod hierarchy; + +pub use ebpf::{Event, Tracker}; +pub use hierarchy::HierarchyBuilder; +pub use runner_shared::artifacts::{ProcessHierarchy, ProcessMetadata}; diff --git a/crates/exectrack/src/main.rs b/crates/exectrack/src/main.rs new file mode 100644 index 00000000..d8925b4d --- /dev/null +++ b/crates/exectrack/src/main.rs @@ -0,0 +1,64 @@ +use anyhow::Result; +use clap::Parser; +use exectrack::{HierarchyBuilder, Tracker}; +use log::{debug, info}; +use runner_shared::artifacts::ArtifactExt; +use std::path::PathBuf; +use std::process::Command; + +#[derive(Parser)] +#[command(name = "codspeed-exectrack")] +#[command(about = "Track process execution tree using eBPF")] +#[command(version)] +struct Cli { + /// Command to run and track + #[arg(trailing_var_arg = true, required = true)] + command: Vec, + + /// Output folder for the process hierarchy data + #[arg(short, long, default_value = ".")] + output: PathBuf, +} + +fn main() -> Result<()> { + env_logger::builder() + .parse_env(env_logger::Env::new().filter_or("CODSPEED_LOG", "info")) + .format_timestamp(None) + .init(); + + let cli = Cli::parse(); + + track_command(&cli.command, &cli.output) +} + +fn track_command(command: &[String], output_dir: &PathBuf) -> Result<()> { + info!("Starting exectrack for command: {command:?}"); + + let mut tracker = Tracker::new()?; + + // FIXME: Start this with SIGKILL + let mut child = Command::new(&command[0]).args(&command[1..]).spawn()?; + + let root_pid = child.id() as i32; + let events = tracker.track(root_pid)?; + let process_thread = std::thread::spawn(move || { + let mut builder = HierarchyBuilder::new(root_pid); + for event in events { + builder.process_event(&event); + } + builder.into_hierarchy() + }); + let status = child.wait()?; + debug!("Process exited with status: {status:?}"); + + // Drop the tracker to close the event channel and allow the event processing thread to complete + drop(tracker); + + // Print and save the process hierarchy + let hierarchy = process_thread.join().unwrap(); + + debug!("Process hierarchy: {hierarchy:#?}"); + hierarchy.save_with_pid_to(output_dir, root_pid)?; + + Ok(()) +} diff --git a/crates/exectrack/tests/integration_test.rs b/crates/exectrack/tests/integration_test.rs new file mode 100644 index 00000000..3bf1b787 --- /dev/null +++ b/crates/exectrack/tests/integration_test.rs @@ -0,0 +1,255 @@ +use anyhow::Context; +use exectrack::{HierarchyBuilder, Tracker}; +use std::process::Command; + +/// Helper to track a command and build its process hierarchy +pub fn track_command( + command: &str, + args: &[&str], +) -> anyhow::Result<( + runner_shared::artifacts::ProcessHierarchy, + std::thread::JoinHandle<()>, +)> { + // Create tracker FIRST, before spawning the child + let mut tracker = Tracker::new()?; + + // Now spawn the child + let mut child = Command::new(command) + .args(args) + .spawn() + .context("Failed to spawn command")?; + let root_pid = child.id() as i32; + + // Track the child process + let rx = tracker.track(root_pid)?; + + // Build hierarchy from events in a separate thread (like the CLI does) + let hierarchy_thread = std::thread::spawn(move || { + let mut builder = HierarchyBuilder::new(root_pid); + for event in rx { + builder.process_event(&event); + } + builder.into_hierarchy() + }); + + // Wait for child to complete + let _ = child.wait()?; + + // Drop tracker to close the event channel + drop(tracker); + + // Get the hierarchy + let hierarchy = hierarchy_thread.join().unwrap(); + + eprintln!("Tracked {} processes", hierarchy.processes.len()); + + // Return a dummy thread handle + let thread_handle = std::thread::spawn(|| {}); + + Ok((hierarchy, thread_handle)) +} + +// ============================================================================ +// INTEGRATION TESTS - CHILD PROCESS TRACKING +// ============================================================================ + +/// Test that a single process (no children) is tracked correctly +#[test_log::test] +fn test_single_process_no_children() -> anyhow::Result<()> { + let (hierarchy, thread_handle) = track_command("sleep", &["1"])?; + + // Should have the root process + assert!( + hierarchy.processes.contains_key(&hierarchy.root_pid), + "Root process should be tracked" + ); + + // Should have no children + assert!( + hierarchy.children.is_empty(), + "Single process should have no children" + ); + + thread_handle.join().unwrap(); + Ok(()) +} + +/// Test that bash spawning a single child process is tracked +#[test_log::test] +fn test_bash_single_child() -> anyhow::Result<()> { + // Use a subshell to force a fork + let (hierarchy, thread_handle) = track_command("bash", &["-c", "(sleep 0.5)"])?; + + eprintln!("Hierarchy: {hierarchy:#?}"); + + // Should have at least 2 processes (bash + sleep or bash + subshell) + assert!( + hierarchy.processes.len() >= 2, + "Expected at least 2 processes, got {}", + hierarchy.processes.len() + ); + + // Should have parent-child relationships + assert!( + !hierarchy.children.is_empty(), + "Expected parent-child relationships to be tracked" + ); + + thread_handle.join().unwrap(); + Ok(()) +} + +/// Test that bash spawning multiple children is tracked +#[test_log::test] +fn test_bash_multiple_children() -> anyhow::Result<()> { + let (hierarchy, thread_handle) = track_command("bash", &["-c", "sleep 0.5 & sleep 1"])?; + + eprintln!("Hierarchy: {hierarchy:#?}"); + + // Should have at least 2 processes (bash + at least one sleep) + // Note: May not capture all children due to timing + assert!( + hierarchy.processes.len() >= 2, + "Expected at least 2 processes (bash + children), got {}", + hierarchy.processes.len() + ); + + // Should have parent-child relationships + assert!( + !hierarchy.children.is_empty(), + "Expected parent-child relationships to be tracked" + ); + + // Find the bash process + let bash_pids: Vec<_> = hierarchy + .processes + .iter() + .filter(|(_, meta)| meta.name.contains("bash")) + .map(|(pid, _)| pid) + .collect(); + + assert!( + !bash_pids.is_empty(), + "Expected to find bash process in hierarchy" + ); + + // Check that bash has at least 1 child + for bash_pid in bash_pids { + if let Some(children) = hierarchy.children.get(bash_pid) { + eprintln!("Bash PID {} has {} children", bash_pid, children.len()); + if !children.is_empty() { + // Found the parent with children + thread_handle.join().unwrap(); + return Ok(()); + } + } + } + + panic!("Expected bash to have at least 1 child"); +} + +/// Test nested process hierarchy (bash -> bash -> sleep) +#[test_log::test] +fn test_nested_process_hierarchy() -> anyhow::Result<()> { + let (hierarchy, thread_handle) = track_command("bash", &["-c", "bash -c '(sleep 0.5)'"])?; + + eprintln!("Hierarchy: {hierarchy:#?}"); + + // Should have at least 2 processes (may have exec optimization) + assert!( + hierarchy.processes.len() >= 2, + "Expected at least 2 processes, got {}", + hierarchy.processes.len() + ); + + // Should have parent-child relationships + assert!( + !hierarchy.children.is_empty(), + "Expected parent-child relationships to be tracked" + ); + + thread_handle.join().unwrap(); + Ok(()) +} + +/// Test that exit codes are captured +#[test_log::test] +fn test_exit_code_capture() -> anyhow::Result<()> { + let (hierarchy, thread_handle) = track_command("bash", &["-c", "exit 42"])?; + + eprintln!("Hierarchy: {hierarchy:#?}"); + + // Find any process with an exit code + let has_exit_code = hierarchy + .processes + .values() + .any(|meta| meta.exit_code.is_some()); + + assert!( + has_exit_code, + "Expected at least one process to have an exit code" + ); + + thread_handle.join().unwrap(); + Ok(()) +} + +/// Test with sh instead of bash +#[test_log::test] +fn test_sh_with_children() -> anyhow::Result<()> { + // Use subshell to force fork + let (hierarchy, thread_handle) = track_command("sh", &["-c", "(sleep 0.5)"])?; + + eprintln!("Hierarchy: {hierarchy:#?}"); + + // Should have at least 2 processes (sh + sleep or subshell) + assert!( + hierarchy.processes.len() >= 2, + "Expected at least 2 processes (sh + sleep), got {}", + hierarchy.processes.len() + ); + + thread_handle.join().unwrap(); + Ok(()) +} + +/// Test process names are captured correctly +#[test_log::test] +fn test_process_names_captured() -> anyhow::Result<()> { + let (hierarchy, thread_handle) = track_command("sleep", &["0.5"])?; + + eprintln!("Hierarchy: {hierarchy:#?}"); + + // Should find processes with expected names + let has_sleep = hierarchy + .processes + .values() + .any(|meta| meta.name == "sleep"); + + assert!(has_sleep, "Expected to find 'sleep' process in hierarchy"); + + thread_handle.join().unwrap(); + Ok(()) +} + +/// Test that start and stop times are recorded +#[test_log::test] +fn test_timestamps_recorded() -> anyhow::Result<()> { + let (hierarchy, thread_handle) = track_command("sleep", &["0.1"])?; + + eprintln!("Hierarchy: {hierarchy:#?}"); + + // Check that processes have timestamps + for (pid, meta) in &hierarchy.processes { + eprintln!("PID {} has start time: {} ns", pid, meta.start_time); + // Start time should be non-zero (nanoseconds since epoch) + assert!( + meta.start_time > 0, + "Process {} should have valid start time", + pid + ); + } + + thread_handle.join().unwrap(); + Ok(()) +} diff --git a/crates/exectrack/wrapper.h b/crates/exectrack/wrapper.h new file mode 100644 index 00000000..541d55a1 --- /dev/null +++ b/crates/exectrack/wrapper.h @@ -0,0 +1,2 @@ +#include +#include "src/ebpf/c/event.h" diff --git a/crates/memtrack/AGENTS.md b/crates/memtrack/AGENTS.md new file mode 120000 index 00000000..681311eb --- /dev/null +++ b/crates/memtrack/AGENTS.md @@ -0,0 +1 @@ +CLAUDE.md \ No newline at end of file diff --git a/crates/memtrack/CLAUDE.md b/crates/memtrack/CLAUDE.md new file mode 100644 index 00000000..e8b1a305 --- /dev/null +++ b/crates/memtrack/CLAUDE.md @@ -0,0 +1,171 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Common Development Commands + +### Building and Testing +```bash +# Build the project +cargo build + +# Build in release mode (required for BPF programs) +cargo build --release + +# Run tests (requires sudo due to BPF requirements) +cargo build --release +sudo cargo test -- --ignored --test-threads=1 + +# Run the memtrack CLI to track memory allocations +cargo build --release +sudo ./target/release/codspeed-memtrack track [args...] --output + +# Or avoid sudo by granting capabilities (Linux 5.8+) +sudo setcap cap_bpf,cap_perfmon+ep ./target/release/codspeed-memtrack +./target/release/codspeed-memtrack track --output + +# Run with debug logging +CODSPEED_LOG=debug sudo ./target/release/codspeed-memtrack track +``` + +### Linting and Formatting +```bash +# Check code formatting +cargo fmt -- --check + +# Format code +cargo fmt + +# Run clippy lints +cargo clippy +``` + +### Running Examples + +#### CLI Examples +```bash +# Run the allocs example (demonstrates various allocation patterns) +cargo build --release && sudo ./target/release/codspeed-memtrack track ./target/release/examples/allocs --output allocs_output.jsonl + +# Run the spawn example (tests child process tracking) +cargo build --release && sudo ./target/release/codspeed-memtrack track ./target/release/examples/spawn --output spawn_output.jsonl + +# Run the visualize example (generates an image from allocation data) +cargo build --release && sudo ./target/release/codspeed-memtrack track ./target/release/examples/visualize --output vis_output.jsonl +``` + +#### Library API Examples +```bash +# Run the library_usage example (demonstrates tracking a spawned process) +cargo build --release --examples && sudo ./target/release/examples/library_usage + +# Run the track_multiple example (demonstrates tracking multiple processes) +cargo build --release --examples && sudo ./target/release/examples/track_multiple +``` + +## Architecture Overview + +This crate implements a BPF-based memory allocation tracker that monitors malloc/free calls and other memory allocation functions in Linux processes using eBPF uprobes. + +### Core Components + +1. **High-Level API** (`src/tracker.rs`): + - `Tracker`: High-level API for tracking processes by PID + - `track(pid)`: Convenience function to track a single process + - `track_multiple(pids)`: Track multiple processes simultaneously + - Returns `Receiver` for consuming allocation events + - Handles BPF initialization, libc attachment, and event polling automatically + +2. **BPF Layer** (`src/bpf.rs` and `src/bpf/*.c`): + - `MemtrackBpf`: Main interface for managing BPF programs and probes + - Attaches uprobes to libc functions: `malloc`, `free`, `calloc`, `realloc`, `aligned_alloc` + - Attaches to `sched_process_fork` tracepoint for process hierarchy tracking + - Uses BPF maps to track PIDs and ring buffers to send events to userspace + - Built with libbpf and generated into skeleton code at build time + +3. **Event System** (`src/events.rs`): + - `Event`: Serializable struct containing allocation events + - `EventType`: Enum distinguishing allocation types (Malloc, Free, Calloc, Realloc, AlignedAlloc, Execve) + - Generated constants from `src/bpf/event_constants.h` compiled during build + +4. **Ring Buffer Poller** (`src/poller.rs`): + - `RingBufferPoller`: Manages BPF ring buffer polling in a background thread + - `EventHandler`: Callback-based or channel-based event processing + - Graceful shutdown with atomic flags and thread joining + +5. **LibC Path Detection** (`src/libc.rs`): + - Finds libc.so.6 across standard Linux paths and NixOS nix/store + - Handles multiple glibc versions in the same system + +6. **Command Interface** (`src/main.rs`): + - CLI entry point with `track` subcommand + - Uses the high-level `Tracker` API internally + - Spawns target process, writes JSONL output + - Manages process lifetime and output file handling + +### BPF Program Details + +The BPF kernel code (`src/bpf/memtrack.bpf.c`) implements: +- **PID Tracking**: Tracks directly added PIDs and auto-tracks child processes via fork tracepoint +- **Process Hierarchy**: Maintains parent-child relationships to track process trees +- **Memory Maps**: + - `tracked_pids`: Hash map of PIDs to track (10000 entries) + - `pids_ppid`: Parent PID mapping for process hierarchy + - `events`: Ring buffer (256KB) for sending events to userspace +- **Uprobes**: Attached to libc allocation functions to capture event data + +### Build Process + +The `build.rs` script: +1. Generates BPF skeleton from `memtrack.bpf.c` using libbpf-cargo +2. Generates Rust bindings from `event_constants.h` using bindgen +3. Outputs generated code to `OUT_DIR` + +## Key Concepts + +### Ring Buffer Polling +The poller runs in a background thread with a configurable timeout (in main.rs, set to 10ms). Events are pulled from the BPF ring buffer and sent through an mpsc channel to the main thread for writing to the output file. + +### Memory Locking +The process bumps the `RLIMIT_MEMLOCK` resource limit to infinity to allow BPF ring buffer allocation (256KB per CPU). + +### JSONL Output +Events are serialized to JSONL (JSON Lines) format with one `Event` per line for streaming processing and easy parsing. + +## Using memtrack as a Library + +memtrack can be used as a library to track memory allocations in your own Rust programs: + +```rust +use memtrack; +use std::process::Command; + +// Spawn a process +let child = Command::new("./my_program").spawn()?; +let pid = child.id() as i32; + +// Track its allocations +let events = memtrack::track(pid)?; + +// Process events +for event in events { + println!("Allocation: {:?}", event); +} +``` + +For more examples, see `examples/library_usage.rs` and `examples/track_multiple.rs`. + +## Testing + +- Integration tests in `tests/integration_test.rs` require sudo to run +- Run with: `cargo build --release && sudo cargo test -- --ignored --test-threads=1` +- Examples in `examples/` demonstrate allocation patterns that can be tracked +- Library API examples show how to use memtrack programmatically + +## Important Notes + +- **Requires elevated privileges**: BPF programs and uprobes require root or CAP_BPF/CAP_PERFMON capabilities + - On Linux 5.8+, you can avoid sudo by granting capabilities: `sudo setcap cap_bpf,cap_perfmon+ep ` +- **Linux-only**: Uses BPF/uprobes which are Linux-specific +- **CPU count**: Ring buffer sizes scale with CPU count in the BPF kernel code +- **NixOS compatibility**: Includes special handling for finding libc in NixOS nix/store diff --git a/crates/memtrack/Cargo.toml b/crates/memtrack/Cargo.toml index abd73e02..2c529439 100644 --- a/crates/memtrack/Cargo.toml +++ b/crates/memtrack/Cargo.toml @@ -19,6 +19,7 @@ ebpf = [ "dep:libbpf-rs", "dep:libbpf-cargo", "dep:vmlinux", + "dep:codspeed-bpf", ] [dependencies] @@ -35,11 +36,13 @@ static_assertions = "1.1" itertools = { workspace = true } paste = "1.0" libbpf-rs = { version = "0.25.0", features = ["vendored"], optional = true } +codspeed-bpf = { path = "../codspeed-bpf", optional = true } [build-dependencies] libbpf-cargo = { version = "0.25.0", optional = true } vmlinux = { git = "https://github.com/libbpf/vmlinux.h.git", rev = "991dd4b8dfd8c9d62ce8999521b24f61d9b7fc52", optional = true } bindgen = "0.71" +codspeed-bpf = { path = "../codspeed-bpf", features = ["build"], optional = true } [dev-dependencies] tempfile = "3.8" diff --git a/crates/memtrack/build.rs b/crates/memtrack/build.rs index 3f9b2b24..454cf8c1 100644 --- a/crates/memtrack/build.rs +++ b/crates/memtrack/build.rs @@ -1,36 +1,8 @@ #[cfg(feature = "ebpf")] -use std::{env, path::PathBuf}; - -#[cfg(feature = "ebpf")] -fn build_ebpf() { - use libbpf_cargo::SkeletonBuilder; - - println!("cargo:rerun-if-changed=src/ebpf/c"); - - // Build the BPF program - let arch = env::var("CARGO_CFG_TARGET_ARCH") - .expect("CARGO_CFG_TARGET_ARCH must be set in build script"); - let memtrack_out = PathBuf::from(env::var("OUT_DIR").unwrap()).join("memtrack.skel.rs"); - SkeletonBuilder::new() - .source("src/ebpf/c/memtrack.bpf.c") - .clang_args([ - "-I", - &vmlinux::include_path_root().join(arch).to_string_lossy(), - ]) - .build_and_generate(&memtrack_out) - .unwrap(); - - // Generate bindings for event.h - let bindings = bindgen::Builder::default() - .header("wrapper.h") - .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) - .generate() - .expect("Unable to generate bindings"); - let out_file = PathBuf::from(env::var("OUT_DIR").unwrap()).join("event.rs"); - std::fs::write(&out_file, bindings.to_string()).expect("Couldn't write bindings!"); -} - fn main() { - #[cfg(feature = "ebpf")] - build_ebpf(); + codspeed_bpf::build::build_bpf("memtrack", "src/ebpf/c/memtrack.bpf.c"); + codspeed_bpf::build::generate_bindings("wrapper.h"); } + +#[cfg(not(feature = "ebpf"))] +fn main() {} diff --git a/crates/memtrack/src/ebpf/c/memtrack.bpf.c b/crates/memtrack/src/ebpf/c/memtrack.bpf.c index 63c5be5f..acc9ba23 100644 --- a/crates/memtrack/src/ebpf/c/memtrack.bpf.c +++ b/crates/memtrack/src/ebpf/c/memtrack.bpf.c @@ -10,79 +10,30 @@ #include "event.h" -char LICENSE[] SEC("license") = "GPL"; +// Include shared BPF utilities from codspeed-bpf +#include "codspeed/common.h" +#include "codspeed/process_tracking.h" -/* Macros for common map definitions */ -#define BPF_HASH_MAP(name, key_type, value_type, max_ents) \ - struct { \ - __uint(type, BPF_MAP_TYPE_HASH); \ - __uint(max_entries, max_ents); \ - __type(key, key_type); \ - __type(value, value_type); \ - } name SEC(".maps") - -#define BPF_ARRAY_MAP(name, value_type, max_ents) \ - struct { \ - __uint(type, BPF_MAP_TYPE_ARRAY); \ - __uint(max_entries, max_ents); \ - __type(key, __u32); \ - __type(value, value_type); \ - } name SEC(".maps") - -#define BPF_RINGBUF(name, size) \ - struct { \ - __uint(type, BPF_MAP_TYPE_RINGBUF); \ - __uint(max_entries, size); \ - } name SEC(".maps") - -BPF_HASH_MAP(tracked_pids, __u32, __u8, 10000); /* Map to store PIDs we're tracking */ -BPF_HASH_MAP(pids_ppid, __u32, __u32, 10000); /* Map to store parent-child relationships to detect hierarchy */ -BPF_RINGBUF(events, 256 * 1024); /* Ring buffer for sending events to userspace */ -BPF_ARRAY_MAP(tracking_enabled, __u8, 1); /* Map to control whether tracking is enabled (0 = disabled, 1 - = enabled) */ +char LICENSE[] SEC("license") = "GPL"; -/* == Code that tracks process forks and execs == */ +/* Define process tracking maps using shared macro */ +PROCESS_TRACKING_MAPS(); -/* Helper to check if a PID or any of its ancestors should be tracked */ -static __always_inline int is_tracked(__u32 pid) { - /* Direct check */ - if (bpf_map_lookup_elem(&tracked_pids, &pid)) { - return 1; - } +/* Ring buffer for sending events to userspace */ +BPF_RINGBUF(events, 256 * 1024); -/* Check parent recursively (up to 5 levels) */ -#pragma unroll - for (int i = 0; i < 5; i++) { - __u32* ppid = bpf_map_lookup_elem(&pids_ppid, &pid); - if (!ppid) { - break; - } - pid = *ppid; - if (bpf_map_lookup_elem(&tracked_pids, &pid)) { - return 1; - } - } +/* Map to control whether tracking is enabled (0 = disabled, 1 = enabled) */ +BPF_ARRAY_MAP(tracking_enabled, __u8, 1); - return 0; -} +/* == Code that tracks process forks and execs == */ SEC("tracepoint/sched/sched_process_fork") int tracepoint_sched_fork(struct trace_event_raw_sched_process_fork* ctx) { __u32 parent_pid = ctx->parent_pid; __u32 child_pid = ctx->child_pid; - /* Print process fork with PIDs */ - // bpf_printk("sched_fork: parent_pid=%u child_pid=%u", parent_pid, child_pid); - - /* Check if parent is being tracked */ - if (is_tracked(parent_pid)) { - /* Auto-track this child */ - __u8 marker = 1; - bpf_map_update_elem(&tracked_pids, &child_pid, &marker, BPF_ANY); - bpf_map_update_elem(&pids_ppid, &child_pid, &parent_pid, BPF_ANY); - - // bpf_printk("auto-tracking child process: child_pid=%u", child_pid); - } + /* Use shared fork handler */ + handle_fork(parent_pid, child_pid); return 0; } diff --git a/crates/memtrack/src/ebpf/memtrack.rs b/crates/memtrack/src/ebpf/memtrack.rs index c9ac47da..d8d06e59 100644 --- a/crates/memtrack/src/ebpf/memtrack.rs +++ b/crates/memtrack/src/ebpf/memtrack.rs @@ -1,139 +1,23 @@ use anyhow::Context; use anyhow::Result; +use codspeed_bpf::ProcessTracking; +use codspeed_bpf::RingBufferPoller; use libbpf_rs::Link; +use libbpf_rs::MapCore; use libbpf_rs::skel::OpenSkel; use libbpf_rs::skel::SkelBuilder; -use libbpf_rs::{MapCore, UprobeOpts}; use log::warn; -use paste::paste; use std::mem::MaybeUninit; use std::path::Path; -use crate::ebpf::poller::RingBufferPoller; +// Use shared macros from codspeed-bpf +use codspeed_bpf::{attach_tracepoint, attach_uprobe, attach_uprobe_uretprobe}; pub mod memtrack_skel { include!(concat!(env!("OUT_DIR"), "/memtrack.skel.rs")); } pub use memtrack_skel::*; -/// Macro to attach a function with both entry and return probes -macro_rules! attach_uprobe_uretprobe { - ($name:ident, $prog_entry:ident, $prog_return:ident, $func_str:expr) => { - fn $name(&mut self, libc_path: &Path) -> Result<()> { - let link = self - .skel - .progs - .$prog_entry - .attach_uprobe_with_opts( - -1, - libc_path, - 0, - UprobeOpts { - func_name: Some($func_str.to_string()), - retprobe: false, - ..Default::default() - }, - ) - .context(format!( - "Failed to attach {} uprobe in {}", - $func_str, - libc_path.display() - ))?; - self.probes.push(link); - - let link = self - .skel - .progs - .$prog_return - .attach_uprobe_with_opts( - -1, - libc_path, - 0, - UprobeOpts { - func_name: Some($func_str.to_string()), - retprobe: true, - ..Default::default() - }, - ) - .context(format!( - "Failed to attach {} uretprobe in {}", - $func_str, - libc_path.display() - ))?; - self.probes.push(link); - - Ok(()) - } - }; - ($name:ident) => { - paste! { - attach_uprobe_uretprobe!( - [], - [], - [], - stringify!($name) - ); - } - }; -} - -macro_rules! attach_uprobe { - ($name:ident, $prog:ident, $func_str:expr) => { - fn $name(&mut self, libc_path: &Path) -> Result<()> { - let link = self - .skel - .progs - .$prog - .attach_uprobe_with_opts( - -1, - libc_path, - 0, - UprobeOpts { - func_name: Some($func_str.to_string()), - retprobe: false, - ..Default::default() - }, - ) - .context(format!( - "Failed to attach {} uprobe in {}", - $func_str, - libc_path.display() - ))?; - self.probes.push(link); - Ok(()) - } - }; - ($name:ident) => { - paste! { - attach_uprobe!( - [], - [], - stringify!($name) - ); - } - }; -} - -macro_rules! attach_tracepoint { - ($func:ident, $prog:ident) => { - fn $func(&mut self) -> Result<()> { - let link = self - .skel - .progs - .$prog - .attach() - .context(format!("Failed to attach {} tracepoint", stringify!($prog)))?; - self.probes.push(link); - Ok(()) - } - }; - ($name:ident) => { - paste! { - attach_tracepoint!([], []); - } - }; -} - pub struct MemtrackBpf { skel: Box>, probes: Vec, @@ -160,20 +44,6 @@ impl MemtrackBpf { }) } - pub fn add_tracked_pid(&mut self, pid: i32) -> Result<()> { - self.skel - .maps - .tracked_pids - .update( - &pid.to_le_bytes(), - &1u8.to_le_bytes(), - libbpf_rs::MapFlags::ANY, - ) - .context("Failed to add PID to uprobes tracked set")?; - - Ok(()) - } - /// Enable event tracking pub fn enable_tracking(&mut self) -> Result<()> { let key = 0u32; @@ -243,6 +113,12 @@ impl MemtrackBpf { } } +impl ProcessTracking for MemtrackBpf { + fn tracked_pids_map(&self) -> &impl MapCore { + &self.skel.maps.tracked_pids + } +} + impl Drop for MemtrackBpf { fn drop(&mut self) { if self.probes.len() > 10 { diff --git a/crates/memtrack/src/ebpf/mod.rs b/crates/memtrack/src/ebpf/mod.rs index 60747327..494f3304 100644 --- a/crates/memtrack/src/ebpf/mod.rs +++ b/crates/memtrack/src/ebpf/mod.rs @@ -1,6 +1,5 @@ mod events; mod memtrack; -mod poller; mod tracker; pub use events::{Event, EventType}; diff --git a/crates/memtrack/src/ebpf/tracker.rs b/crates/memtrack/src/ebpf/tracker.rs index 5c864918..fec06668 100644 --- a/crates/memtrack/src/ebpf/tracker.rs +++ b/crates/memtrack/src/ebpf/tracker.rs @@ -1,7 +1,8 @@ use crate::ebpf::{Event, MemtrackBpf}; use anyhow::Result; +use codspeed_bpf::ProcessTracking; use log::debug; -use std::sync::mpsc::{self, Receiver}; +use std::sync::mpsc::Receiver; pub struct Tracker { bpf: MemtrackBpf, @@ -16,8 +17,8 @@ impl Tracker { /// - Attach uprobes to all libc instances /// - Attach tracepoints for fork tracking pub fn new() -> Result { - // Bump memlock limits - Self::bump_memlock_rlimit()?; + // Bump memlock limits using shared utility + codspeed_bpf::bump_memlock_rlimit()?; // Find and attach to all libc instances let libc_paths = crate::libc::find_libc_paths()?; @@ -48,7 +49,7 @@ impl Tracker { // Keep the poller alive by moving it into the channel // When the receiver is dropped, the poller will also be dropped - let (tx, rx) = mpsc::channel(); + let (tx, rx) = std::sync::mpsc::channel(); std::thread::spawn(move || { // Keep poller alive let _p = _poller; @@ -62,20 +63,6 @@ impl Tracker { Ok(rx) } - fn bump_memlock_rlimit() -> Result<()> { - let rlimit = libc::rlimit { - rlim_cur: libc::RLIM_INFINITY, - rlim_max: libc::RLIM_INFINITY, - }; - - let ret = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &rlimit) }; - if ret != 0 { - anyhow::bail!("Failed to increase rlimit"); - } - - Ok(()) - } - /// Enable event tracking in the BPF program pub fn enable(&mut self) -> anyhow::Result<()> { self.bpf.enable_tracking() diff --git a/crates/runner-shared/src/artifacts/mod.rs b/crates/runner-shared/src/artifacts/mod.rs index a2d015bf..0a9a5983 100644 --- a/crates/runner-shared/src/artifacts/mod.rs +++ b/crates/runner-shared/src/artifacts/mod.rs @@ -4,9 +4,11 @@ use serde::Serialize; mod execution_timestamps; mod memtrack; +mod process_hierarchy; pub use execution_timestamps::*; pub use memtrack::*; +pub use process_hierarchy::{ProcessHierarchy, ProcessMetadata}; pub trait ArtifactExt where diff --git a/crates/runner-shared/src/artifacts/process_hierarchy.rs b/crates/runner-shared/src/artifacts/process_hierarchy.rs new file mode 100644 index 00000000..2b919461 --- /dev/null +++ b/crates/runner-shared/src/artifacts/process_hierarchy.rs @@ -0,0 +1,24 @@ +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +/// Information about a single process +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProcessMetadata { + pub pid: i32, + pub name: String, + pub start_time: u64, + pub exit_code: Option, + pub stop_time: Option, +} + +/// Tree-like structure tracking process execution hierarchy +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProcessHierarchy { + pub root_pid: i32, + /// Map of PID to process metadata + pub processes: HashMap, + /// Map of parent PID to list of child PIDs + pub children: HashMap>, +} + +impl super::ArtifactExt for ProcessHierarchy {} diff --git a/src/run/runner/wall_time/executor.rs b/src/run/runner/wall_time/executor.rs index 4d586817..be753380 100644 --- a/src/run/runner/wall_time/executor.rs +++ b/src/run/runner/wall_time/executor.rs @@ -99,6 +99,21 @@ impl WallTimeExecutor { } } + fn wrap_with_exectrack( + cmd_builder: CommandBuilder, + profile_folder: &Path, + ) -> Result { + let results_dir = profile_folder.join("results"); + std::fs::create_dir_all(&results_dir)?; + + let mut exectrack_builder = CommandBuilder::new("codspeed-exectrack"); + exectrack_builder.arg("--output"); + exectrack_builder.arg(results_dir.to_string_lossy().to_string()); + exectrack_builder.arg("--"); + exectrack_builder.wrap_with(cmd_builder); + Ok(exectrack_builder) + } + fn walltime_bench_cmd( config: &Config, run_data: &RunData, @@ -189,6 +204,8 @@ impl Executor for WallTimeExecutor { let (_env_file, _script_file, cmd_builder) = WallTimeExecutor::walltime_bench_cmd(config, run_data)?; + + let cmd_builder = Self::wrap_with_exectrack(cmd_builder, &run_data.profile_folder)?; if let Some(perf) = &self.perf && config.enable_perf {