From ea5c5075dc6e16df5b7e52754979d6e5956801c2 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 22 Feb 2024 17:52:06 +0100 Subject: [PATCH 1/3] wip --- .../src/normalize/span/description/mod.rs | 21 +++++++++- .../src/normalize/span/tag_extraction.rs | 40 +++++++++++++++++++ relay-event-normalization/src/regexes.rs | 14 +++++++ 3 files changed, 73 insertions(+), 2 deletions(-) diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 2aa6bbc8fe..669623a7f7 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -13,7 +13,8 @@ use relay_event_schema::protocol::Span; use url::Url; use crate::regexes::{ - DB_SQL_TRANSACTION_CORE_DATA_REGEX, REDIS_COMMAND_REGEX, RESOURCE_NORMALIZER_REGEX, + DB_SQL_TRANSACTION_CORE_DATA_REGEX, DB_SUPABASE_REGEX, REDIS_COMMAND_REGEX, + RESOURCE_NORMALIZER_REGEX, }; use crate::span::description::resource::COMMON_PATH_SEGMENTS; use crate::span::tag_extraction::HTTP_METHOD_EXTRACTOR_REGEX; @@ -70,12 +71,14 @@ pub(crate) fn scrub_span_description( // The description will only contain the entity queried and // the query type ("User find" for example). Some(description.to_owned()) + } else if span_origin == Some("auto.db.supabase") { + scrub_supabase(description) } else { let (scrubbed, mode) = sql::scrub_queries(db_system, description); if let sql::Mode::Parsed(ast) = mode { parsed_sql = Some(ast); } - scrubbed + dbg!(scrubbed) } } ("resource", ty) => scrub_resource(ty, description), @@ -141,6 +144,13 @@ fn scrub_core_data(string: &str) -> Option { } } +fn scrub_supabase(string: &str) -> Option { + match DB_SUPABASE_REGEX.replace_all(string, "{%s}") { + Cow::Owned(scrubbed) => Some(scrubbed), + Cow::Borrowed(_) => None, + } +} + fn scrub_http(string: &str) -> Option { let (method, url) = string.split_once(' ')?; if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) { @@ -912,6 +922,13 @@ mod tests { span_description_test!(db_prisma, "User find", "db.sql.prisma", "User find"); + span_description_test!( + db_supabase, + "from(my_table)", + "db.auto.supabase", + "from(my_table)" + ); + #[test] fn informed_sql_parser() { let json = r#" diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index a3c99289f4..bc6d9b43e3 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -365,6 +365,12 @@ pub fn extract_tags( } else { None } + } else if span.origin.as_str() == Some("auto.db.supabase") { + scrubbed_description.as_deref().map(|s| { + s.trim_start_matches("from(") + .trim_end_matches(')') + .to_owned() + }) } else if span_op.starts_with("db") { span.description .value() @@ -1428,4 +1434,38 @@ LIMIT 1 Some(&"Chrome".to_string()) ); } + + #[test] + fn supabase() { + let json = r#"{ + "description": "from(my_table00)", + "op": "db.select", + "origin": "auto.db.supabase" + }"#; + + let span = Annotated::::from_json(json) + .unwrap() + .into_value() + .unwrap(); + + let tags = extract_tags( + &span, + &Config { + max_tag_value_size: 200, + }, + None, + None, + false, + None, + ); + + assert_eq!( + tags.get(&SpanTagKey::Description).map(String::as_str), + Some("from(my_table{%s})") + ); + assert_eq!( + tags.get(&SpanTagKey::Domain).map(String::as_str), + Some("my_table{%s}") + ); + } } diff --git a/relay-event-normalization/src/regexes.rs b/relay-event-normalization/src/regexes.rs index 76d5439f76..b273f1ef07 100644 --- a/relay-event-normalization/src/regexes.rs +++ b/relay-event-normalization/src/regexes.rs @@ -81,3 +81,17 @@ pub static RESOURCE_NORMALIZER_REGEX: Lazy = Lazy::new(|| { pub static DB_SQL_TRANSACTION_CORE_DATA_REGEX: Lazy = Lazy::new(|| Regex::new(r"(?P\d+)").unwrap()); + +pub static DB_SUPABASE_REGEX: Lazy = Lazy::new(|| { + Regex::new( + r"(?xi) + # UUIDs. + (?P[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) | + # Hexadecimal strings with more than 5 digits. + (?P[a-f0-9]{5}[a-f0-9]+) | + # Integer IDs with more than one digit. + (?P\d\d+) + ", + ) + .unwrap() +}); From 82fa157d76fc36fc0942b6a22cacb3b881d52e22 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 22 Feb 2024 17:54:41 +0100 Subject: [PATCH 2/3] rm dbg --- relay-event-normalization/src/normalize/span/description/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 669623a7f7..3b6ec7051f 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -78,7 +78,7 @@ pub(crate) fn scrub_span_description( if let sql::Mode::Parsed(ast) = mode { parsed_sql = Some(ast); } - dbg!(scrubbed) + scrubbed } } ("resource", ty) => scrub_resource(ty, description), From 745b948dd253e6b7e37853c6930c748d5e649539 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 23 Feb 2024 09:48:35 +0100 Subject: [PATCH 3/3] fix & doc --- CHANGELOG.md | 1 + .../src/normalize/span/description/mod.rs | 10 +++------- .../src/normalize/span/tag_extraction.rs | 8 +++++++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d57f876328..0d62c35a7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Extend GPU context with data for Unreal Engine crash reports. ([#3144](https://github.com/getsentry/relay/pull/3144)) - Parametrize transaction in dynamic sampling context. ([#3141](https://github.com/getsentry/relay/pull/3141)) +- Parse & scrub span description for supabase. ([#3153](https://github.com/getsentry/relay/pull/3153)) **Bug Fixes**: diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 3b6ec7051f..da659dc564 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -72,6 +72,9 @@ pub(crate) fn scrub_span_description( // the query type ("User find" for example). Some(description.to_owned()) } else if span_origin == Some("auto.db.supabase") { + // The description only contains the table name, e.g. `"from(users)`. + // In the future, we might want to parse `data.query` as well. + // See https://github.com/supabase-community/sentry-integration-js/blob/master/index.js#L259 scrub_supabase(description) } else { let (scrubbed, mode) = sql::scrub_queries(db_system, description); @@ -922,13 +925,6 @@ mod tests { span_description_test!(db_prisma, "User find", "db.sql.prisma", "User find"); - span_description_test!( - db_supabase, - "from(my_table)", - "db.auto.supabase", - "from(my_table)" - ); - #[test] fn informed_sql_parser() { let json = r#" diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index bc6d9b43e3..44a78e7325 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -1440,7 +1440,13 @@ LIMIT 1 let json = r#"{ "description": "from(my_table00)", "op": "db.select", - "origin": "auto.db.supabase" + "origin": "auto.db.supabase", + "data": { + "query": [ + "select(*,other(*))", + "in(something, (value1,value2))" + ] + } }"#; let span = Annotated::::from_json(json)