From f06946492e2da7b0d2096256db3a6b8563f31a27 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 19 Sep 2024 09:56:58 +0200 Subject: [PATCH 1/7] wip --- relay-event-schema/src/protocol/span.rs | 78 ++++++++++++++++++- relay-protocol/src/macros.rs | 21 +++++ .../src/services/processor/span/processing.rs | 25 +++++- 3 files changed, 117 insertions(+), 7 deletions(-) diff --git a/relay-event-schema/src/protocol/span.rs b/relay-event-schema/src/protocol/span.rs index 10bc53fb7d..d536928bc5 100644 --- a/relay-event-schema/src/protocol/span.rs +++ b/relay-event-schema/src/protocol/span.rs @@ -1,10 +1,12 @@ mod convert; -use relay_protocol::{Annotated, Empty, Error, FromValue, Getter, IntoValue, Object, Val, Value}; +use relay_protocol::{ + Annotated, Array, Empty, Error, FromValue, Getter, IntoValue, Object, Val, Value, +}; use crate::processor::ProcessValue; use crate::protocol::{ - EventId, JsonLenientString, LenientString, Measurements, MetricsSummary, OperationType, + EventId, IpAddr, JsonLenientString, LenientString, Measurements, MetricsSummary, OperationType, OriginType, SpanId, SpanStatus, ThreadId, Timestamp, TraceId, }; @@ -352,6 +354,66 @@ pub struct SpanData { #[metastructure(field = "user")] pub user: Annotated, + /// User email address. + /// + /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + #[metastructure(field = "user.email")] + pub user_email: Annotated, + + /// User’s full name. + /// + /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + #[metastructure(field = "user.full_name")] + pub user_full_name: Annotated, + + /// Two-letter country code (ISO 3166-1 alpha-2). + /// + /// This is not an OTel convention (yet). + #[metastructure(field = "user.geo.country_code")] + pub user_geo_country_code: Annotated, + + /// Human readable city name. + /// + /// This is not an OTel convention (yet). + #[metastructure(field = "user.geo.city")] + pub user_geo_city: Annotated, + + /// Human readable subdivision name. + /// + /// This is not an OTel convention (yet). + #[metastructure(field = "user.geo.subdivision")] + pub user_geo_subdivision: Annotated, + + /// Human readable region name or code. + /// + /// This is not an OTel convention (yet). + #[metastructure(field = "user.geo.region")] + pub user_geo_region: Annotated, + + /// Unique user hash to correlate information for a user in anonymized form. + /// + /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + #[metastructure(field = "user.hash")] + pub user_hash: Annotated, + + /// Unique identifier of the user. + /// + /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + #[metastructure(field = "user.id")] + pub user_id: Annotated, + + /// Short name or login/username of the user. + /// + /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + #[metastructure(field = "user.name")] + pub user_name: Annotated, + + /// Array of user roles at the time of the event. + /// + /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + #[metastructure(field = "user.roles")] + pub user_roles: Array, + /// Replay ID #[metastructure(field = "sentry.replay.id", legacy_alias = "replay_id")] pub replay_id: Annotated, @@ -410,7 +472,7 @@ pub struct SpanData { /// The client's IP address. #[metastructure(field = "client.address")] - pub client_address: Annotated, + pub client_address: Annotated, /// The current route in the application. /// @@ -463,6 +525,16 @@ impl Getter for SpanData { "thread\\.name" => self.thread_name.as_str()?.into(), "ui\\.component_name" => self.ui_component_name.value()?.into(), "url\\.scheme" => self.url_scheme.value()?.into(), + "user" => self.user.value()?.into(), + "user\\.email" => self.user_email.as_str()?.into(), + "user\\.full_name" => self.user_full_name.as_str()?.into(), + "user\\.geo\\.city" => self.user_geo_city.as_str()?.into(), + "user\\.geo\\.country_code" => self.user_geo_country_code.as_str()?.into(), + "user\\.geo\\.region" => self.user_geo_region.as_str()?.into(), + "user\\.geo\\.subdivision" => self.user_geo_subdivision.as_str()?.into(), + "user\\.hash" => self.user_hash.as_str()?.into(), + "user\\.id" => self.user_id.as_str()?.into(), + "user\\.name" => self.user_name.as_str()?.into(), "transaction" => self.segment_name.as_str()?.into(), "release" => self.release.as_str()?.into(), _ => { diff --git a/relay-protocol/src/macros.rs b/relay-protocol/src/macros.rs index 381b664c88..48594a0045 100644 --- a/relay-protocol/src/macros.rs +++ b/relay-protocol/src/macros.rs @@ -146,6 +146,27 @@ macro_rules! get_value { }}; } +#[macro_export] +macro_rules! get_value_mut { + (@access $root:ident,) => {}; + (@access $root:ident, !) => { + let $root = $root.unwrap(); + }; + (@access $root:ident, . $field:ident $( $tail:tt )*) => { + let $root = $root.and_then(|v| v.$field.value_mut().as_mut()); + get_value_mut!(@access $root, $($tail)*); + }; + (@access $root:ident, [ $index:literal ] $( $tail:tt )*) => { + let $root = $root.and_then(|v| v.get($index)).and_then(|a| a.value_mut()); + get_value!(@access $root, $($tail)*); + }; + ($root:ident $( $tail:tt )*) => {{ + let $root = $root.value_mut(); + $crate::get_value_mut!(@access $root, $($tail)*); + $root + }}; +} + /// Derives the [`FromValue`], [`IntoValue`], and [`Empty`] traits using the string representation. /// /// Requires that this type implements `FromStr` and `Display`. Implements the following traits: diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index 91044f137e..4936b876aa 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -24,7 +24,7 @@ use relay_event_schema::protocol::{BrowserContext, Span, SpanData}; use relay_log::protocol::{Attachment, AttachmentType}; use relay_metrics::{MetricNamespace, UnixTimestamp}; use relay_pii::PiiProcessor; -use relay_protocol::{Annotated, Empty}; +use relay_protocol::{get_path, get_value, get_value_mut, Annotated, Empty}; use relay_quotas::DataCategory; use relay_spans::otel_trace::Span as OtelSpan; use thiserror::Error; @@ -454,9 +454,7 @@ fn normalize( set_segment_attributes(annotated_span); - // This follows the steps of `NormalizeProcessor::process_event`. - // Ideally, `NormalizeProcessor` would execute these steps generically, i.e. also when calling - // `process` on it. + // This follows the steps of `event::normalize`. process_value( annotated_span, @@ -483,6 +481,25 @@ fn normalize( return Err(ProcessingError::NoEventPayload); }; + if let Some(data) = span.data.value_mut() { + // Replace {{auto}} IPs: + if let Some(client_ip) = config.client_ip { + if let Some(ip) = get_value_mut!(data.client_address) { + // TODO: is {{ auto }} set automatically for events? + if ip.is_auto() { + config.client_ip.clone_into(ip); + } + } + } + + // Derive geo ip: + if let Some(ip) = get_value!(annotated_span.data.client_address) { + if let Ok(Some(geo)) = geoip_lookup.lookup(ip.as_str()) { + data.user_geo = geo; + } + } + } + populate_ua_fields(span, user_agent.as_deref(), client_hints.as_deref()); if let Annotated(Some(ref mut measurement_values), ref mut meta) = span.measurements { From 8c26dc8f20d9fec39dd8d1cceff3a55489ce378c Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 19 Sep 2024 11:19:46 +0200 Subject: [PATCH 2/7] wip --- relay-event-schema/src/protocol/span.rs | 2 +- relay-protocol/src/macros.rs | 21 --- relay-server/src/services/processor.rs | 2 +- .../src/services/processor/span/processing.rs | 123 ++++++++++++++++-- 4 files changed, 113 insertions(+), 35 deletions(-) diff --git a/relay-event-schema/src/protocol/span.rs b/relay-event-schema/src/protocol/span.rs index d536928bc5..d10d9dff72 100644 --- a/relay-event-schema/src/protocol/span.rs +++ b/relay-event-schema/src/protocol/span.rs @@ -412,7 +412,7 @@ pub struct SpanData { /// /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ #[metastructure(field = "user.roles")] - pub user_roles: Array, + pub user_roles: Annotated>, /// Replay ID #[metastructure(field = "sentry.replay.id", legacy_alias = "replay_id")] diff --git a/relay-protocol/src/macros.rs b/relay-protocol/src/macros.rs index 48594a0045..381b664c88 100644 --- a/relay-protocol/src/macros.rs +++ b/relay-protocol/src/macros.rs @@ -146,27 +146,6 @@ macro_rules! get_value { }}; } -#[macro_export] -macro_rules! get_value_mut { - (@access $root:ident,) => {}; - (@access $root:ident, !) => { - let $root = $root.unwrap(); - }; - (@access $root:ident, . $field:ident $( $tail:tt )*) => { - let $root = $root.and_then(|v| v.$field.value_mut().as_mut()); - get_value_mut!(@access $root, $($tail)*); - }; - (@access $root:ident, [ $index:literal ] $( $tail:tt )*) => { - let $root = $root.and_then(|v| v.get($index)).and_then(|a| a.value_mut()); - get_value!(@access $root, $($tail)*); - }; - ($root:ident $( $tail:tt )*) => {{ - let $root = $root.value_mut(); - $crate::get_value_mut!(@access $root, $($tail)*); - $root - }}; -} - /// Derives the [`FromValue`], [`IntoValue`], and [`Empty`] traits using the string representation. /// /// Requires that this type implements `FromStr` and `Display`. Implements the following traits: diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 1c9261b44c..8fc647bf69 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1866,7 +1866,7 @@ impl EnvelopeProcessorService { if_processing!(self.inner.config, { let global_config = self.inner.global_config.current(); - span::process(state, &global_config); + span::process(state, &global_config, self.inner.geoip_lookup.as_ref()); self.enforce_quotas(state)?; }); diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index 4936b876aa..be67438e0f 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -16,15 +16,15 @@ use relay_event_normalization::{ TransactionsProcessor, }; use relay_event_normalization::{ - normalize_transaction_name, ClientHints, FromUserAgentInfo, ModelCosts, SchemaProcessor, - TimestampProcessor, TransactionNameRule, TrimmingProcessor, + normalize_transaction_name, ClientHints, FromUserAgentInfo, GeoIpLookup, ModelCosts, + SchemaProcessor, TimestampProcessor, TransactionNameRule, TrimmingProcessor, }; use relay_event_schema::processor::{process_value, ProcessingState}; -use relay_event_schema::protocol::{BrowserContext, Span, SpanData}; +use relay_event_schema::protocol::{BrowserContext, IpAddr, Span, SpanData}; use relay_log::protocol::{Attachment, AttachmentType}; use relay_metrics::{MetricNamespace, UnixTimestamp}; use relay_pii::PiiProcessor; -use relay_protocol::{get_path, get_value, get_value_mut, Annotated, Empty}; +use relay_protocol::{Annotated, Empty}; use relay_quotas::DataCategory; use relay_spans::otel_trace::Span as OtelSpan; use thiserror::Error; @@ -42,7 +42,11 @@ use crate::utils::{sample, ItemAction, ManagedEnvelope}; #[error(transparent)] struct ValidationError(#[from] anyhow::Error); -pub fn process(state: &mut ProcessEnvelopeState, global_config: &GlobalConfig) { +pub fn process( + state: &mut ProcessEnvelopeState, + global_config: &GlobalConfig, + geo_lookup: Option<&GeoIpLookup>, +) { use relay_event_normalization::RemoveOtherProcessor; // We only implement trace-based sampling rules for now, which can be computed @@ -58,6 +62,8 @@ pub fn process(state: &mut ProcessEnvelopeState, global_config: &Glob global_config, state.project_state.config(), &state.managed_envelope, + state.envelope().meta().client_addr().map(IpAddr::from), + geo_lookup, ); let client_ip = state.managed_envelope.envelope().meta().client_addr(); @@ -352,6 +358,13 @@ struct NormalizeSpanConfig<'a> { allowed_hosts: &'a [String], /// Whether or not to scrub MongoDB span descriptions during normalization. scrub_mongo_description: ScrubMongoDescription, + /// The IP address of the SDK that sent the event. + /// + /// When `{{auto}}` is specified and there is no other IP address in the payload, such as in the + /// `request` context, this IP address gets added to `span.data.client_address`. + client_ip: Option, + /// An initialized GeoIP lookup. + geo_lookup: Option<&'a GeoIpLookup>, } impl<'a> NormalizeSpanConfig<'a> { @@ -360,6 +373,8 @@ impl<'a> NormalizeSpanConfig<'a> { global_config: &'a GlobalConfig, project_config: &'a ProjectConfig, managed_envelope: &ManagedEnvelope, + client_ip: Option, + geo_lookup: Option<&'a GeoIpLookup>, ) -> Self { let aggregator_config = config.aggregator_config_for(MetricNamespace::Spans); @@ -397,6 +412,8 @@ impl<'a> NormalizeSpanConfig<'a> { } else { ScrubMongoDescription::Disabled }, + client_ip, + geo_lookup, } } } @@ -450,6 +467,8 @@ fn normalize( client_hints, allowed_hosts, scrub_mongo_description, + client_ip, + geo_lookup, } = config; set_segment_attributes(annotated_span); @@ -483,19 +502,23 @@ fn normalize( if let Some(data) = span.data.value_mut() { // Replace {{auto}} IPs: - if let Some(client_ip) = config.client_ip { - if let Some(ip) = get_value_mut!(data.client_address) { - // TODO: is {{ auto }} set automatically for events? + if let Some(client_ip) = client_ip.as_ref() { + if let Some(ip) = data.client_address.value_mut().as_mut() { if ip.is_auto() { - config.client_ip.clone_into(ip); + *ip = client_ip.clone(); } } } // Derive geo ip: - if let Some(ip) = get_value!(annotated_span.data.client_address) { - if let Ok(Some(geo)) = geoip_lookup.lookup(ip.as_str()) { - data.user_geo = geo; + if let Some(geoip_lookup) = geo_lookup { + if let Some(ip) = data.client_address.value() { + if let Ok(Some(geo)) = geoip_lookup.lookup(ip.as_str()) { + data.user_geo_city = geo.city; + data.user_geo_country_code = geo.country_code; + data.user_geo_region = geo.region; + data.user_geo_subdivision = geo.subdivision; + } } } } @@ -681,6 +704,7 @@ mod tests { use std::sync::Arc; use bytes::Bytes; + use insta::assert_debug_snapshot; use relay_base_schema::project::ProjectId; use relay_event_schema::protocol::{ Context, ContextInner, SpanId, Timestamp, TraceContext, TraceId, @@ -1041,4 +1065,79 @@ mod tests { assert_eq!(get_value!(span.data.user_agent_original), None); assert_eq!(get_value!(span.data.browser_name!), "Opera"); } + + #[test] + fn user_ip_from_client_ip_without_auto() { + let mut span = Annotated::from_json( + r#"{ + "start_timestamp": 0, + "timestamp": 1, + "trace_id": "1", + "span_id": "1", + "data": { + "client_address": "2.125.160.216" + } + }"#, + ) + .unwrap(); + + let geo_lookup = GeoIpLookup::open( + "../relay-event-normalization/tests/fixtures/GeoIP2-Enterprise-Test.mmdb", + ) + .unwrap(); + + normalize( + &mut span, + NormalizeSpanConfig { + received_at: DateTime::from_timestamp_nanos(0), + timestamp_range: UnixTimestamp::from_datetime(DateTime::::MIN_UTC).unwrap() + ..UnixTimestamp::from_datetime(DateTime::::MAX_UTC).unwrap(), + max_tag_value_size: 200, + performance_score: None, + measurements: None, + ai_model_costs: None, + max_name_and_unit_len: 200, + tx_name_rules: &[], + user_agent: None, + client_hints: ClientHints::default(), + allowed_hosts: &[], + scrub_mongo_description: ScrubMongoDescription::Disabled, + client_ip: None, + geo_lookup: Some(&geo_lookup), + }, + ) + .unwrap(); + + assert_debug_snapshot!(span, @""); + } + + // #[test] + // fn user_ip_from_client_ip_with_auto() { + // let span = Annotated::from_json( + // r#"{ + // "data": { + // "client_address": "{{auto}}", + // } + // }"#, + // ) + // .unwrap(); + + // let ip_address = IpAddr::parse("2.125.160.216").unwrap(); + + // let geo = GeoIpLookup::open("tests/fixtures/GeoIP2-Enterprise-Test.mmdb").unwrap(); + // normalize_event( + // &mut event, + // &NormalizationConfig { + // client_ip: Some(&ip_address), + // geoip_lookup: Some(&geo), + // ..Default::default() + // }, + // ); + + // let user = get_value!(event.user!); + // let ip_addr = user.ip_address.value().expect("ip address missing"); + + // assert_eq!(ip_addr, &IpAddr("2.125.160.216".to_string())); + // assert!(user.geo.value().is_some()); + // } } From db7c14a2bceca9d6c2574b048803389c76d6ae4c Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 19 Sep 2024 14:59:10 +0200 Subject: [PATCH 3/7] test --- .../src/services/processor/span/processing.rs | 109 ++++++++---------- 1 file changed, 51 insertions(+), 58 deletions(-) diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index be67438e0f..5283a290b8 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -704,7 +704,7 @@ mod tests { use std::sync::Arc; use bytes::Bytes; - use insta::assert_debug_snapshot; + use once_cell::sync::Lazy; use relay_base_schema::project::ProjectId; use relay_event_schema::protocol::{ Context, ContextInner, SpanId, Timestamp, TraceContext, TraceId, @@ -1066,78 +1066,71 @@ mod tests { assert_eq!(get_value!(span.data.browser_name!), "Opera"); } + static GEO_LOOKUP: Lazy = Lazy::new(|| { + GeoIpLookup::open("../relay-event-normalization/tests/fixtures/GeoIP2-Enterprise-Test.mmdb") + .unwrap() + }); + + fn normalize_config() -> NormalizeSpanConfig<'static> { + NormalizeSpanConfig { + received_at: DateTime::from_timestamp_nanos(0), + timestamp_range: UnixTimestamp::from_datetime( + DateTime::::from_timestamp_millis(1000).unwrap(), + ) + .unwrap() + ..UnixTimestamp::from_datetime(DateTime::::MAX_UTC).unwrap(), + max_tag_value_size: 200, + performance_score: None, + measurements: None, + ai_model_costs: None, + max_name_and_unit_len: 200, + tx_name_rules: &[], + user_agent: None, + client_hints: ClientHints::default(), + allowed_hosts: &[], + scrub_mongo_description: ScrubMongoDescription::Disabled, + client_ip: Some(IpAddr("2.125.160.216".to_owned())), + geo_lookup: Some(&GEO_LOOKUP), + } + } + #[test] fn user_ip_from_client_ip_without_auto() { let mut span = Annotated::from_json( r#"{ "start_timestamp": 0, "timestamp": 1, - "trace_id": "1", - "span_id": "1", + "trace_id": "922dda2462ea4ac2b6a4b339bee90863", + "span_id": "922dda2462ea4ac2", "data": { - "client_address": "2.125.160.216" + "client.address": "2.125.160.216" } }"#, ) .unwrap(); - let geo_lookup = GeoIpLookup::open( - "../relay-event-normalization/tests/fixtures/GeoIP2-Enterprise-Test.mmdb", - ) - .unwrap(); + normalize(&mut span, normalize_config()).unwrap(); - normalize( - &mut span, - NormalizeSpanConfig { - received_at: DateTime::from_timestamp_nanos(0), - timestamp_range: UnixTimestamp::from_datetime(DateTime::::MIN_UTC).unwrap() - ..UnixTimestamp::from_datetime(DateTime::::MAX_UTC).unwrap(), - max_tag_value_size: 200, - performance_score: None, - measurements: None, - ai_model_costs: None, - max_name_and_unit_len: 200, - tx_name_rules: &[], - user_agent: None, - client_hints: ClientHints::default(), - allowed_hosts: &[], - scrub_mongo_description: ScrubMongoDescription::Disabled, - client_ip: None, - geo_lookup: Some(&geo_lookup), - }, + assert_eq!(get_value!(span.data.user_geo_city!), "Boxford"); + } + + #[test] + fn user_ip_from_client_ip_with_auto() { + let mut span = Annotated::from_json( + r#"{ + "start_timestamp": 0, + "timestamp": 1, + "trace_id": "922dda2462ea4ac2b6a4b339bee90863", + "span_id": "922dda2462ea4ac2", + "data": { + "client.address": "{{auto}}" + } + }"#, ) .unwrap(); - assert_debug_snapshot!(span, @""); - } + normalize(&mut span, normalize_config()).unwrap(); - // #[test] - // fn user_ip_from_client_ip_with_auto() { - // let span = Annotated::from_json( - // r#"{ - // "data": { - // "client_address": "{{auto}}", - // } - // }"#, - // ) - // .unwrap(); - - // let ip_address = IpAddr::parse("2.125.160.216").unwrap(); - - // let geo = GeoIpLookup::open("tests/fixtures/GeoIP2-Enterprise-Test.mmdb").unwrap(); - // normalize_event( - // &mut event, - // &NormalizationConfig { - // client_ip: Some(&ip_address), - // geoip_lookup: Some(&geo), - // ..Default::default() - // }, - // ); - - // let user = get_value!(event.user!); - // let ip_addr = user.ip_address.value().expect("ip address missing"); - - // assert_eq!(ip_addr, &IpAddr("2.125.160.216".to_string())); - // assert!(user.geo.value().is_some()); - // } + assert_eq!(get_value!(span.data.user_geo_city!), "Boxford"); + } } From 9beb628465295ee7fe078223401e0ebacd6d98f6 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 19 Sep 2024 15:15:40 +0200 Subject: [PATCH 4/7] doc --- relay-event-schema/src/protocol/span.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/relay-event-schema/src/protocol/span.rs b/relay-event-schema/src/protocol/span.rs index d10d9dff72..1424073612 100644 --- a/relay-event-schema/src/protocol/span.rs +++ b/relay-event-schema/src/protocol/span.rs @@ -356,13 +356,13 @@ pub struct SpanData { /// User email address. /// - /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + /// #[metastructure(field = "user.email")] pub user_email: Annotated, /// User’s full name. /// - /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + /// #[metastructure(field = "user.full_name")] pub user_full_name: Annotated, @@ -392,25 +392,25 @@ pub struct SpanData { /// Unique user hash to correlate information for a user in anonymized form. /// - /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + /// #[metastructure(field = "user.hash")] pub user_hash: Annotated, /// Unique identifier of the user. /// - /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + /// #[metastructure(field = "user.id")] pub user_id: Annotated, /// Short name or login/username of the user. /// - /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + /// #[metastructure(field = "user.name")] pub user_name: Annotated, /// Array of user roles at the time of the event. /// - /// https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/ + /// #[metastructure(field = "user.roles")] pub user_roles: Annotated>, From 12ff83bc023774cfea69f0e66ef0e21cf08ee257 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 19 Sep 2024 15:47:22 +0200 Subject: [PATCH 5/7] snapshots --- relay-event-schema/src/protocol/span.rs | 14 +++++- .../src/protocol/span/convert.rs | 10 ++++ ...t__tests__extract_span_metrics_mobile.snap | 50 +++++++++++++++++++ relay-spans/src/span.rs | 10 ++++ 4 files changed, 83 insertions(+), 1 deletion(-) diff --git a/relay-event-schema/src/protocol/span.rs b/relay-event-schema/src/protocol/span.rs index 1424073612..119b377500 100644 --- a/relay-event-schema/src/protocol/span.rs +++ b/relay-event-schema/src/protocol/span.rs @@ -849,6 +849,16 @@ mod tests { ui_component_name: ~, url_scheme: ~, user: ~, + user_email: ~, + user_full_name: ~, + user_geo_country_code: ~, + user_geo_city: ~, + user_geo_subdivision: ~, + user_geo_region: ~, + user_hash: ~, + user_id: ~, + user_name: ~, + user_roles: ~, replay_id: ~, sdk_name: ~, sdk_version: ~, @@ -877,7 +887,9 @@ mod tests { messaging_message_id: "abc123", user_agent_original: "Chrome", url_full: "my_url.com", - client_address: "192.168.0.1", + client_address: IpAddr( + "192.168.0.1", + ), route: ~, previous_route: ~, other: { diff --git a/relay-event-schema/src/protocol/span/convert.rs b/relay-event-schema/src/protocol/span/convert.rs index eaef3e8a3c..892be0d4e8 100644 --- a/relay-event-schema/src/protocol/span/convert.rs +++ b/relay-event-schema/src/protocol/span/convert.rs @@ -193,6 +193,16 @@ mod tests { ui_component_name: ~, url_scheme: ~, user: ~, + user_email: ~, + user_full_name: ~, + user_geo_country_code: ~, + user_geo_city: ~, + user_geo_subdivision: ~, + user_geo_region: ~, + user_hash: ~, + user_id: ~, + user_name: ~, + user_roles: ~, replay_id: ~, sdk_name: "sentry.php", sdk_version: "1.2.3", diff --git a/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap b/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap index 5cc31d7c05..b05de03984 100644 --- a/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap +++ b/relay-server/src/metrics_extraction/snapshots/relay_server__metrics_extraction__event__tests__extract_span_metrics_mobile.snap @@ -114,6 +114,16 @@ expression: "(&event.value().unwrap().spans, metrics)" ui_component_name: ~, url_scheme: ~, user: ~, + user_email: ~, + user_full_name: ~, + user_geo_country_code: ~, + user_geo_city: ~, + user_geo_subdivision: ~, + user_geo_region: ~, + user_hash: ~, + user_id: ~, + user_name: ~, + user_roles: ~, replay_id: ~, sdk_name: ~, sdk_version: ~, @@ -460,6 +470,16 @@ expression: "(&event.value().unwrap().spans, metrics)" ui_component_name: ~, url_scheme: ~, user: ~, + user_email: ~, + user_full_name: ~, + user_geo_country_code: ~, + user_geo_city: ~, + user_geo_subdivision: ~, + user_geo_region: ~, + user_hash: ~, + user_id: ~, + user_name: ~, + user_roles: ~, replay_id: ~, sdk_name: ~, sdk_version: ~, @@ -568,6 +588,16 @@ expression: "(&event.value().unwrap().spans, metrics)" ui_component_name: ~, url_scheme: ~, user: ~, + user_email: ~, + user_full_name: ~, + user_geo_country_code: ~, + user_geo_city: ~, + user_geo_subdivision: ~, + user_geo_region: ~, + user_hash: ~, + user_id: ~, + user_name: ~, + user_roles: ~, replay_id: ~, sdk_name: ~, sdk_version: ~, @@ -728,6 +758,16 @@ expression: "(&event.value().unwrap().spans, metrics)" ui_component_name: ~, url_scheme: ~, user: ~, + user_email: ~, + user_full_name: ~, + user_geo_country_code: ~, + user_geo_city: ~, + user_geo_subdivision: ~, + user_geo_region: ~, + user_hash: ~, + user_id: ~, + user_name: ~, + user_roles: ~, replay_id: ~, sdk_name: ~, sdk_version: ~, @@ -836,6 +876,16 @@ expression: "(&event.value().unwrap().spans, metrics)" ui_component_name: ~, url_scheme: ~, user: ~, + user_email: ~, + user_full_name: ~, + user_geo_country_code: ~, + user_geo_city: ~, + user_geo_subdivision: ~, + user_geo_region: ~, + user_hash: ~, + user_id: ~, + user_name: ~, + user_roles: ~, replay_id: ~, sdk_name: ~, sdk_version: ~, diff --git a/relay-spans/src/span.rs b/relay-spans/src/span.rs index f8ac4d05f1..93150cbe21 100644 --- a/relay-spans/src/span.rs +++ b/relay-spans/src/span.rs @@ -660,6 +660,16 @@ mod tests { ui_component_name: ~, url_scheme: ~, user: ~, + user_email: ~, + user_full_name: ~, + user_geo_country_code: ~, + user_geo_city: ~, + user_geo_subdivision: ~, + user_geo_region: ~, + user_hash: ~, + user_id: ~, + user_name: ~, + user_roles: ~, replay_id: ~, sdk_name: "sentry.php", sdk_version: ~, From 22812df68958489a58e668a4ecc972697264f071 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 19 Sep 2024 17:04:48 +0200 Subject: [PATCH 6/7] Treat missing IP address as {{auto}} --- .../src/services/processor/span/processing.rs | 65 ++++++++++++++----- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index 5283a290b8..00b1d9858b 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -500,25 +500,27 @@ fn normalize( return Err(ProcessingError::NoEventPayload); }; - if let Some(data) = span.data.value_mut() { - // Replace {{auto}} IPs: - if let Some(client_ip) = client_ip.as_ref() { - if let Some(ip) = data.client_address.value_mut().as_mut() { - if ip.is_auto() { - *ip = client_ip.clone(); - } - } + // Replace missing / {{auto}} IPs: + // Transaction and error events require an explicit `{{auto}}` to derive the IP, but + // for spans we derive it by default: + if let Some(client_ip) = client_ip.as_ref() { + let ip = span.data.value().and_then(|d| d.client_address.value()); + if ip.map_or(true, |ip| ip.is_auto()) { + span.data + .get_or_insert_with(Default::default) + .client_address = Annotated::new(client_ip.clone()); } + } - // Derive geo ip: - if let Some(geoip_lookup) = geo_lookup { - if let Some(ip) = data.client_address.value() { - if let Ok(Some(geo)) = geoip_lookup.lookup(ip.as_str()) { - data.user_geo_city = geo.city; - data.user_geo_country_code = geo.country_code; - data.user_geo_region = geo.region; - data.user_geo_subdivision = geo.subdivision; - } + // Derive geo ip: + if let Some(geoip_lookup) = geo_lookup { + let data = span.data.get_or_insert_with(Default::default); + if let Some(ip) = data.client_address.value() { + if let Ok(Some(geo)) = geoip_lookup.lookup(ip.as_str()) { + data.user_geo_city = geo.city; + data.user_geo_country_code = geo.country_code; + data.user_geo_region = geo.region; + data.user_geo_subdivision = geo.subdivision; } } } @@ -1111,6 +1113,10 @@ mod tests { normalize(&mut span, normalize_config()).unwrap(); + assert_eq!( + get_value!(span.data.client_address!).as_str(), + "2.125.160.216" + ); assert_eq!(get_value!(span.data.user_geo_city!), "Boxford"); } @@ -1131,6 +1137,31 @@ mod tests { normalize(&mut span, normalize_config()).unwrap(); + assert_eq!( + get_value!(span.data.client_address!).as_str(), + "2.125.160.216" + ); + assert_eq!(get_value!(span.data.user_geo_city!), "Boxford"); + } + + #[test] + fn user_ip_from_client_ip_with_missing() { + let mut span = Annotated::from_json( + r#"{ + "start_timestamp": 0, + "timestamp": 1, + "trace_id": "922dda2462ea4ac2b6a4b339bee90863", + "span_id": "922dda2462ea4ac2" + }"#, + ) + .unwrap(); + + normalize(&mut span, normalize_config()).unwrap(); + + assert_eq!( + get_value!(span.data.client_address!).as_str(), + "2.125.160.216" + ); assert_eq!(get_value!(span.data.user_geo_city!), "Boxford"); } } From d333c7e1eb388cd73a9b474ec31b69a996301923 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 20 Sep 2024 08:13:40 +0200 Subject: [PATCH 7/7] test, changelog --- CHANGELOG.md | 1 + tests/integration/test_spans.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93b80f284a..01d6515363 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ **Features:** - Add a config option to add default tags to all Relay Sentry events. ([#3944](https://github.com/getsentry/relay/pull/3944)) +- Automatically derive `client.address` and `user.geo` for standalone spans. ([#4047](https://github.com/getsentry/relay/pull/4047)) ## 24.9.0 diff --git a/tests/integration/test_spans.py b/tests/integration/test_spans.py index 7b0c685a4e..bd883b5ed8 100644 --- a/tests/integration/test_spans.py +++ b/tests/integration/test_spans.py @@ -871,6 +871,7 @@ def test_span_ingestion_with_performance_scores( { "data": { "browser.name": "Python Requests", + "client.address": "127.0.0.1", "user_agent.original": "python-requests/2.32.2", }, "duration_ms": 1500, @@ -910,6 +911,7 @@ def test_span_ingestion_with_performance_scores( { "data": { "browser.name": "Python Requests", + "client.address": "127.0.0.1", "sentry.replay.id": "8477286c8e5148b386b71ade38374d58", "sentry.segment.name": "/page/with/click/interaction/*/*", "user": "admin@sentry.io",