Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[crashtracker] Enable client to configure which signals to trap #856

Merged
merged 25 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions bin_tests/src/bin/crashtracker_bin_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ fn main() -> anyhow::Result<()> {
mod unix {
use anyhow::Context;
use bin_tests::modes::behavior::get_behavior;
use nix::sys::signal::{raise, Signal};
danielsn marked this conversation as resolved.
Show resolved Hide resolved
use std::env;
use std::path::Path;

Expand All @@ -24,8 +25,9 @@ mod unix {
const TEST_COLLECTOR_TIMEOUT_MS: u32 = 10_000;

#[inline(never)]
unsafe fn deref_ptr(p: *mut u8) {
unsafe fn deref_ptr(p: *mut u8) -> u8 {
*std::hint::black_box(p) = std::hint::black_box(1);
*std::hint::black_box(p)
}

pub fn main() -> anyhow::Result<()> {
Expand All @@ -34,6 +36,7 @@ mod unix {
let receiver_binary = args.next().context("Unexpected number of arguments")?;
let output_dir = args.next().context("Unexpected number of arguments")?;
let mode_str = args.next().context("Unexpected number of arguments")?;
let crash_typ = args.next().context("Missing crash type")?;
anyhow::ensure!(args.next().is_none(), "unexpected extra arguments");

let stderr_filename = format!("{output_dir}/out.stderr");
Expand All @@ -54,6 +57,7 @@ mod unix {
create_alt_stack: true,
use_alt_stack: true,
resolve_frames: crashtracker::StacktraceCollection::WithoutSymbols,
signals: crashtracker::default_signals(),
endpoint,
timeout_ms: TEST_COLLECTOR_TIMEOUT_MS,
unix_socket_path: Some("".to_string()),
Expand Down Expand Up @@ -95,11 +99,18 @@ mod unix {
behavior.post(output_dir)?;

crashtracker::begin_op(crashtracker::OpTypes::ProfilerCollectingSample)?;
unsafe {
deref_ptr(std::ptr::null_mut::<u8>());
match crash_typ.as_str() {
"null_deref" => {
let x = unsafe { deref_ptr(std::ptr::null_mut::<u8>()) };
println!("{x}");
}
"sigabrt" => raise(Signal::SIGABRT)?,
"sigill" => raise(Signal::SIGILL)?,
"sigbus" => raise(Signal::SIGBUS)?,
"sigsegv" => raise(Signal::SIGSEGV)?,
_ => anyhow::bail!("Unexpected crash_typ: {crash_typ}"),
}
crashtracker::end_op(crashtracker::OpTypes::ProfilerCollectingSample)?;

Ok(())
}
}
203 changes: 145 additions & 58 deletions bin_tests/tests/crashtracker_bin_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,62 +11,95 @@ use std::{fs, path::PathBuf};

use anyhow::Context;
use bin_tests::{build_artifacts, ArtifactType, ArtifactsBuild, BuildProfile};
use serde_json::Value;

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_debug() {
test_crash_tracking_bin(BuildProfile::Debug, "donothing");
test_crash_tracking_bin(BuildProfile::Debug, "donothing", "null_deref");
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_sigpipe() {
test_crash_tracking_bin(BuildProfile::Debug, "sigpipe");
test_crash_tracking_bin(BuildProfile::Debug, "sigpipe", "null_deref");
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_sigchld() {
test_crash_tracking_bin(BuildProfile::Debug, "sigchld");
test_crash_tracking_bin(BuildProfile::Debug, "sigchld", "null_deref");
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_sigchld_exec() {
test_crash_tracking_bin(BuildProfile::Debug, "sigchld_exec");
test_crash_tracking_bin(BuildProfile::Debug, "sigchld_exec", "null_deref");
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_sigstack() {
test_crash_tracking_bin(BuildProfile::Release, "donothing_sigstack");
test_crash_tracking_bin(BuildProfile::Release, "donothing_sigstack", "null_deref");
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_sigpipe_sigstack() {
test_crash_tracking_bin(BuildProfile::Release, "sigpipe_sigstack");
test_crash_tracking_bin(BuildProfile::Release, "sigpipe_sigstack", "null_deref");
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_sigchld_sigstack() {
test_crash_tracking_bin(BuildProfile::Release, "sigchld_sigstack");
test_crash_tracking_bin(BuildProfile::Release, "sigchld_sigstack", "null_deref");
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_chained() {
test_crash_tracking_bin(BuildProfile::Release, "chained");
test_crash_tracking_bin(BuildProfile::Release, "chained", "null_deref");
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_fork() {
test_crash_tracking_bin(BuildProfile::Release, "fork");
test_crash_tracking_bin(BuildProfile::Release, "fork", "null_deref");
}

fn test_crash_tracking_bin(crash_tracking_receiver_profile: BuildProfile, mode: &str) {
#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_abort() {
// For now, do the base test (donothing). For future we should probably also test chaining.
test_crash_tracking_bin(BuildProfile::Release, "donothing", "sigabrt");
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_sigill() {
// For now, do the base test (donothing). For future we should probably also test chaining.
test_crash_tracking_bin(BuildProfile::Release, "donothing", "sigill");
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_sigbus() {
// For now, do the base test (donothing). For future we should probably also test chaining.
test_crash_tracking_bin(BuildProfile::Release, "donothing", "sigbus");
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_crash_tracking_bin_sigsegv() {
// For now, do the base test (donothing). For future we should probably also test chaining.
test_crash_tracking_bin(BuildProfile::Release, "donothing", "sigsegv");
}

fn test_crash_tracking_bin(
crash_tracking_receiver_profile: BuildProfile,
mode: &str,
crash_typ: &str,
) {
let (crashtracker_bin, crashtracker_receiver) =
setup_crashtracking_crates(crash_tracking_receiver_profile);
let fixtures = setup_test_fixtures(&[&crashtracker_receiver, &crashtracker_bin]);
Expand All @@ -76,13 +109,23 @@ fn test_crash_tracking_bin(crash_tracking_receiver_profile: BuildProfile, mode:
.arg(fixtures.artifacts[&crashtracker_receiver].as_os_str())
.arg(&fixtures.output_dir)
.arg(mode)
.arg(crash_typ)
.spawn()
.unwrap();
let exit_status = bin_tests::timeit!("exit after signal", {
eprintln!("Waiting for exit");
p.wait().unwrap()
});
assert!(!exit_status.success());

// When we raise SIGSEGV/SIGBUS, the chained handler doesn't kill the program
// Presumably because continuing after raise is allowed.
// Not sure why sigill behaves differently??
// TODO: figure that out.
match crash_typ {
"null_deref" | "sigabrt" | "sigill" => assert!(!exit_status.success()),
"sigbus" | "sigsegv" => (),
_ => unreachable!("{crash_typ} shouldn't happen"),
}

let stderr_path = format!("{0}/out.stderr", fixtures.output_dir.display());
let stderr = fs::read(stderr_path)
Expand Down Expand Up @@ -120,32 +163,12 @@ fn test_crash_tracking_bin(crash_tracking_receiver_profile: BuildProfile, mode:
crash_payload["counters"],
);
let sig_info = &crash_payload["sig_info"];
// On every platform other than OSX ARM, the si_code is 1: SEGV_MAPERR
// On OSX ARM, its 2: SEGV_ACCERR
assert!(
*sig_info
== serde_json::json!({
"si_addr": "0x0000000000000000",
"si_code": 1,
"si_code_human_readable": "UNKNOWN",
"si_signo": 11,
"si_signo_human_readable": "SIGSEGV",
})
|| *sig_info
== serde_json::json!({
"si_addr": "0x0000000000000000",
"si_code": 2,
"si_code_human_readable": "UNKNOWN",
"si_signo": 11,
"si_signo_human_readable": "SIGSEGV",
}),
"Unexpected sig_info: {sig_info}"
);
assert_siginfo_message(sig_info, crash_typ);

let crash_telemetry = fs::read(fixtures.crash_telemetry_path)
.context("reading crashtracker telemetry payload")
.unwrap();
assert_telemetry_message(&crash_telemetry);
assert_telemetry_message(&crash_telemetry, crash_typ);

// Crashtracking signal handler chaining tests, as well as other tests, might only be able to
// influence system state after the main application has crashed, and has therefore lost the
Expand All @@ -161,7 +184,46 @@ fn test_crash_tracking_bin(crash_tracking_receiver_profile: BuildProfile, mode:
}
}

fn assert_telemetry_message(crash_telemetry: &[u8]) {
fn assert_siginfo_message(sig_info: &Value, crash_typ: &str) {
match crash_typ {
"sigabrt" => {
assert_eq!(sig_info["si_code_human_readable"], "UNKNOWN");
assert_eq!(sig_info["si_signo"], libc::SIGABRT);
assert_eq!(sig_info["si_signo_human_readable"], "SIGABRT");
}
"sigsegv" => {
assert_eq!(sig_info["si_code_human_readable"], "UNKNOWN");
assert_eq!(sig_info["si_signo"], libc::SIGSEGV);
assert_eq!(sig_info["si_signo_human_readable"], "SIGSEGV");
}
"sigbus" => {
assert_eq!(sig_info["si_code_human_readable"], "UNKNOWN");
assert_eq!(sig_info["si_signo"], libc::SIGBUS);
assert_eq!(sig_info["si_signo_human_readable"], "SIGBUS");
}
"sigill" => {
assert_eq!(sig_info["si_code_human_readable"], "UNKNOWN");
assert_eq!(sig_info["si_signo"], libc::SIGILL);
assert_eq!(sig_info["si_signo_human_readable"], "SIGILL");
}
"null_deref" =>
// On every platform other than OSX ARM, the si_code is 1: SEGV_MAPERR
// On OSX ARM, its 2: SEGV_ACCERR
{
assert_eq!(sig_info["si_addr"], "0x0000000000000000");
assert!(
sig_info["si_code"] == 2 || sig_info["si_code"] == 1,
"{sig_info:?}"
);
assert_eq!(sig_info["si_code_human_readable"], "UNKNOWN");
assert_eq!(sig_info["si_signo"], libc::SIGSEGV);
assert_eq!(sig_info["si_signo_human_readable"], "SIGSEGV");
}
_ => panic!("unexpected crash_typ {crash_typ}"),
}
}

fn assert_telemetry_message(crash_telemetry: &[u8], crash_typ: &str) {
let telemetry_payload: serde_json::Value =
serde_json::from_slice::<serde_json::Value>(crash_telemetry)
.context("deserializing crashtracker telemetry payload to json")
Expand All @@ -185,8 +247,8 @@ fn assert_telemetry_message(crash_telemetry: &[u8]) {
.split(',')
.filter(|t| !t.starts_with("uuid:"))
.collect::<std::collections::HashSet<_>>();
// As above, ARM OSX can have a si_code of 2.
assert!(

let base_expected_tags: std::collections::HashSet<&str> =
std::collections::HashSet::from_iter([
"data_schema_version:1.2",
"incomplete:false",
Expand All @@ -195,27 +257,51 @@ fn assert_telemetry_message(crash_telemetry: &[u8]) {
"profiler_inactive:0",
"profiler_serializing:0",
"profiler_unwinding:0",
"si_addr:0x0000000000000000",
"si_code_human_readable:UNKNOWN",
"si_code:1",
"si_signo_human_readable:SIGSEGV",
"si_signo:11",
]) == tags
|| std::collections::HashSet::from_iter([
"data_schema_version:1.2",
"incomplete:false",
"is_crash:true",
"profiler_collecting_sample:1",
"profiler_inactive:0",
"profiler_serializing:0",
"profiler_unwinding:0",
"si_addr:0x0000000000000000",
"si_code_human_readable:UNKNOWN",
"si_code:2",
"si_signo_human_readable:SIGSEGV",
"si_signo:11",
]) == tags
);
]);

match crash_typ {
"sigabrt" => {
assert!(base_expected_tags.is_subset(&tags), "{tags:?}");
assert!(tags.contains("si_code_human_readable:UNKNOWN"), "{tags:?}");
assert!(tags.contains("si_signo_human_readable:SIGABRT"), "{tags:?}");
assert!(tags.contains("si_signo:6"), "{tags:?}");
}
"sigbus" => {
assert!(base_expected_tags.is_subset(&tags), "{tags:?}");
assert!(tags.contains("si_code_human_readable:UNKNOWN"), "{tags:?}");
assert!(tags.contains("si_signo_human_readable:SIGBUS"), "{tags:?}");
// SIGBUS can be 7 or 10, depending on the os.
assert!(
tags.contains(format!("si_signo:{}", libc::SIGBUS).as_str()),
"{tags:?}"
);
}
"sigill" => {
assert!(base_expected_tags.is_subset(&tags), "{tags:?}");
assert!(tags.contains("si_code_human_readable:UNKNOWN"), "{tags:?}");
assert!(tags.contains("si_signo_human_readable:SIGILL"), "{tags:?}");
assert!(tags.contains("si_signo:4"), "{tags:?}");
}
"sigsegv" => {
assert!(base_expected_tags.is_subset(&tags), "{tags:?}");
assert!(tags.contains("si_code_human_readable:UNKNOWN"), "{tags:?}");
assert!(tags.contains("si_signo_human_readable:SIGSEGV"), "{tags:?}");
assert!(tags.contains("si_signo:11"), "{tags:?}");
}
"null_deref" => {
assert!(base_expected_tags.is_subset(&tags), "{tags:?}");
assert!(tags.contains("si_addr:0x0000000000000000"), "{tags:?}");
assert!(tags.contains("si_code_human_readable:UNKNOWN"), "{tags:?}");
assert!(tags.contains("si_signo_human_readable:SIGSEGV"), "{tags:?}");
assert!(tags.contains("si_signo:11"), "{tags:?}");
assert!(
tags.contains("si_code:1") || tags.contains("si_code:2"),
"{tags:?}"
);
}
_ => panic!("{crash_typ}"),
}

assert_eq!(telemetry_payload["payload"][0]["is_sensitive"], true);
}

Expand All @@ -238,6 +324,7 @@ fn crash_tracking_empty_endpoint() {
.arg(fixtures.artifacts[&crashtracker_receiver].as_os_str())
.arg(&fixtures.output_dir)
.arg("donothing")
.arg("null_deref")
.env(
"DD_TRACE_AGENT_URL",
format!("unix://{}", socket_path.display()),
Expand Down Expand Up @@ -286,7 +373,7 @@ fn crash_tracking_empty_endpoint() {
let resp = String::from_utf8_lossy(&out[..total_read]);
let pos = resp.find("\r\n\r\n").unwrap();
let body = &resp[pos + 4..];
assert_telemetry_message(body.as_bytes());
assert_telemetry_message(body.as_bytes(), "null_deref");
}

struct TestFixtures<'a> {
Expand Down
5 changes: 5 additions & 0 deletions crashtracker-ffi/src/collector/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ pub struct Config<'a> {
/// If None, the crashtracker will infer the agent host from env variables.
pub endpoint: Option<&'a Endpoint>,
pub resolve_frames: StacktraceCollection,
/// The set of signals we should be registered for.
/// If empty, use the default set.
pub signals: Slice<'a, i32>,
/// Timeout in milliseconds before the signal handler starts tearing things down to return.
/// This is given as a uint32_t, but the actual timeout needs to fit inside of an i32 (max
/// 2^31-1). This is a limitation of the various interfaces used to guarantee the timeout.
Expand All @@ -84,6 +87,7 @@ impl<'a> TryFrom<Config<'a>> for datadog_crashtracker::CrashtrackerConfiguration
let use_alt_stack = value.use_alt_stack;
let endpoint = value.endpoint.cloned();
let resolve_frames = value.resolve_frames;
let signals = value.signals.iter().copied().collect();
let timeout_ms = value.timeout_ms;
let unix_socket_path = value.optional_unix_socket_filename.try_to_string_option()?;
Self::new(
Expand All @@ -92,6 +96,7 @@ impl<'a> TryFrom<Config<'a>> for datadog_crashtracker::CrashtrackerConfiguration
use_alt_stack,
endpoint,
resolve_frames,
signals,
timeout_ms,
unix_socket_path,
)
Expand Down
Loading
Loading