Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(spans): Derive geo info for standalone spans #4047

Merged
merged 8 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 75 additions & 3 deletions relay-event-schema/src/protocol/span.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
mod convert;

use relay_protocol::{Annotated, Empty, Error, FromValue, Getter, IntoValue, Object, Val, Value};
use relay_protocol::{
Annotated, Array, Empty, Error, FromValue, Getter, IntoValue, Object, Val, Value,
};

use crate::processor::ProcessValue;
use crate::protocol::{
EventId, JsonLenientString, LenientString, Measurements, MetricsSummary, OperationType,
EventId, IpAddr, JsonLenientString, LenientString, Measurements, MetricsSummary, OperationType,
OriginType, SpanId, SpanStatus, ThreadId, Timestamp, TraceId,
};

Expand Down Expand Up @@ -352,6 +354,66 @@ pub struct SpanData {
#[metastructure(field = "user")]
pub user: Annotated<Value>,

/// User email address.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.email")]
pub user_email: Annotated<String>,

/// User’s full name.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.full_name")]
pub user_full_name: Annotated<String>,

/// Two-letter country code (ISO 3166-1 alpha-2).
///
/// This is not an OTel convention (yet).
#[metastructure(field = "user.geo.country_code")]
pub user_geo_country_code: Annotated<String>,

/// Human readable city name.
///
/// This is not an OTel convention (yet).
#[metastructure(field = "user.geo.city")]
pub user_geo_city: Annotated<String>,

/// Human readable subdivision name.
///
/// This is not an OTel convention (yet).
#[metastructure(field = "user.geo.subdivision")]
pub user_geo_subdivision: Annotated<String>,

/// Human readable region name or code.
///
/// This is not an OTel convention (yet).
#[metastructure(field = "user.geo.region")]
pub user_geo_region: Annotated<String>,

/// Unique user hash to correlate information for a user in anonymized form.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.hash")]
pub user_hash: Annotated<String>,

/// Unique identifier of the user.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.id")]
pub user_id: Annotated<String>,

/// Short name or login/username of the user.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.name")]
pub user_name: Annotated<String>,

/// Array of user roles at the time of the event.
///
/// <https://opentelemetry.io/docs/specs/semconv/attributes-registry/user/>
#[metastructure(field = "user.roles")]
pub user_roles: Annotated<Array<String>>,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided to add all user-related fields from the Otel convention, even though we don't use them atm.


/// Replay ID
#[metastructure(field = "sentry.replay.id", legacy_alias = "replay_id")]
pub replay_id: Annotated<Value>,
Expand Down Expand Up @@ -410,7 +472,7 @@ pub struct SpanData {

/// The client's IP address.
#[metastructure(field = "client.address")]
pub client_address: Annotated<String>,
pub client_address: Annotated<IpAddr>,

/// The current route in the application.
///
Expand Down Expand Up @@ -463,6 +525,16 @@ impl Getter for SpanData {
"thread\\.name" => self.thread_name.as_str()?.into(),
"ui\\.component_name" => self.ui_component_name.value()?.into(),
"url\\.scheme" => self.url_scheme.value()?.into(),
"user" => self.user.value()?.into(),
"user\\.email" => self.user_email.as_str()?.into(),
"user\\.full_name" => self.user_full_name.as_str()?.into(),
"user\\.geo\\.city" => self.user_geo_city.as_str()?.into(),
"user\\.geo\\.country_code" => self.user_geo_country_code.as_str()?.into(),
"user\\.geo\\.region" => self.user_geo_region.as_str()?.into(),
"user\\.geo\\.subdivision" => self.user_geo_subdivision.as_str()?.into(),
"user\\.hash" => self.user_hash.as_str()?.into(),
"user\\.id" => self.user_id.as_str()?.into(),
"user\\.name" => self.user_name.as_str()?.into(),
"transaction" => self.segment_name.as_str()?.into(),
"release" => self.release.as_str()?.into(),
_ => {
Expand Down
2 changes: 1 addition & 1 deletion relay-server/src/services/processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1866,7 +1866,7 @@ impl EnvelopeProcessorService {
if_processing!(self.inner.config, {
let global_config = self.inner.global_config.current();

span::process(state, &global_config);
span::process(state, &global_config, self.inner.geoip_lookup.as_ref());

self.enforce_quotas(state)?;
});
Expand Down
123 changes: 116 additions & 7 deletions relay-server/src/services/processor/span/processing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ use relay_event_normalization::{
TransactionsProcessor,
};
use relay_event_normalization::{
normalize_transaction_name, ClientHints, FromUserAgentInfo, ModelCosts, SchemaProcessor,
TimestampProcessor, TransactionNameRule, TrimmingProcessor,
normalize_transaction_name, ClientHints, FromUserAgentInfo, GeoIpLookup, ModelCosts,
SchemaProcessor, TimestampProcessor, TransactionNameRule, TrimmingProcessor,
};
use relay_event_schema::processor::{process_value, ProcessingState};
use relay_event_schema::protocol::{BrowserContext, Span, SpanData};
use relay_event_schema::protocol::{BrowserContext, IpAddr, Span, SpanData};
use relay_log::protocol::{Attachment, AttachmentType};
use relay_metrics::{MetricNamespace, UnixTimestamp};
use relay_pii::PiiProcessor;
Expand All @@ -42,7 +42,11 @@ use crate::utils::{sample, ItemAction, ManagedEnvelope};
#[error(transparent)]
struct ValidationError(#[from] anyhow::Error);

pub fn process(state: &mut ProcessEnvelopeState<SpanGroup>, global_config: &GlobalConfig) {
pub fn process(
state: &mut ProcessEnvelopeState<SpanGroup>,
global_config: &GlobalConfig,
geo_lookup: Option<&GeoIpLookup>,
) {
use relay_event_normalization::RemoveOtherProcessor;

// We only implement trace-based sampling rules for now, which can be computed
Expand All @@ -58,6 +62,8 @@ pub fn process(state: &mut ProcessEnvelopeState<SpanGroup>, global_config: &Glob
global_config,
state.project_state.config(),
&state.managed_envelope,
state.envelope().meta().client_addr().map(IpAddr::from),
geo_lookup,
);

let client_ip = state.managed_envelope.envelope().meta().client_addr();
Expand Down Expand Up @@ -352,6 +358,13 @@ struct NormalizeSpanConfig<'a> {
allowed_hosts: &'a [String],
/// Whether or not to scrub MongoDB span descriptions during normalization.
scrub_mongo_description: ScrubMongoDescription,
/// The IP address of the SDK that sent the event.
///
/// When `{{auto}}` is specified and there is no other IP address in the payload, such as in the
/// `request` context, this IP address gets added to `span.data.client_address`.
client_ip: Option<IpAddr>,
/// An initialized GeoIP lookup.
geo_lookup: Option<&'a GeoIpLookup>,
}

impl<'a> NormalizeSpanConfig<'a> {
Expand All @@ -360,6 +373,8 @@ impl<'a> NormalizeSpanConfig<'a> {
global_config: &'a GlobalConfig,
project_config: &'a ProjectConfig,
managed_envelope: &ManagedEnvelope,
client_ip: Option<IpAddr>,
geo_lookup: Option<&'a GeoIpLookup>,
) -> Self {
let aggregator_config = config.aggregator_config_for(MetricNamespace::Spans);

Expand Down Expand Up @@ -397,6 +412,8 @@ impl<'a> NormalizeSpanConfig<'a> {
} else {
ScrubMongoDescription::Disabled
},
client_ip,
geo_lookup,
}
}
}
Expand Down Expand Up @@ -450,13 +467,13 @@ fn normalize(
client_hints,
allowed_hosts,
scrub_mongo_description,
client_ip,
geo_lookup,
} = config;

set_segment_attributes(annotated_span);

// This follows the steps of `NormalizeProcessor::process_event`.
// Ideally, `NormalizeProcessor` would execute these steps generically, i.e. also when calling
// `process` on it.
// This follows the steps of `event::normalize`.

process_value(
annotated_span,
Expand All @@ -483,6 +500,29 @@ fn normalize(
return Err(ProcessingError::NoEventPayload);
};

if let Some(data) = span.data.value_mut() {
// Replace {{auto}} IPs:
if let Some(client_ip) = client_ip.as_ref() {
if let Some(ip) = data.client_address.value_mut().as_mut() {
if ip.is_auto() {
*ip = client_ip.clone();
}
}
}

// Derive geo ip:
if let Some(geoip_lookup) = geo_lookup {
if let Some(ip) = data.client_address.value() {
if let Ok(Some(geo)) = geoip_lookup.lookup(ip.as_str()) {
data.user_geo_city = geo.city;
data.user_geo_country_code = geo.country_code;
data.user_geo_region = geo.region;
data.user_geo_subdivision = geo.subdivision;
}
}
}
}

populate_ua_fields(span, user_agent.as_deref(), client_hints.as_deref());

if let Annotated(Some(ref mut measurement_values), ref mut meta) = span.measurements {
Expand Down Expand Up @@ -664,6 +704,7 @@ mod tests {
use std::sync::Arc;

use bytes::Bytes;
use once_cell::sync::Lazy;
use relay_base_schema::project::ProjectId;
use relay_event_schema::protocol::{
Context, ContextInner, SpanId, Timestamp, TraceContext, TraceId,
Expand Down Expand Up @@ -1024,4 +1065,72 @@ mod tests {
assert_eq!(get_value!(span.data.user_agent_original), None);
assert_eq!(get_value!(span.data.browser_name!), "Opera");
}

static GEO_LOOKUP: Lazy<GeoIpLookup> = Lazy::new(|| {
GeoIpLookup::open("../relay-event-normalization/tests/fixtures/GeoIP2-Enterprise-Test.mmdb")
.unwrap()
});

fn normalize_config() -> NormalizeSpanConfig<'static> {
NormalizeSpanConfig {
received_at: DateTime::from_timestamp_nanos(0),
timestamp_range: UnixTimestamp::from_datetime(
DateTime::<Utc>::from_timestamp_millis(1000).unwrap(),
)
.unwrap()
..UnixTimestamp::from_datetime(DateTime::<Utc>::MAX_UTC).unwrap(),
max_tag_value_size: 200,
performance_score: None,
measurements: None,
ai_model_costs: None,
max_name_and_unit_len: 200,
tx_name_rules: &[],
user_agent: None,
client_hints: ClientHints::default(),
allowed_hosts: &[],
scrub_mongo_description: ScrubMongoDescription::Disabled,
client_ip: Some(IpAddr("2.125.160.216".to_owned())),
geo_lookup: Some(&GEO_LOOKUP),
}
}

#[test]
fn user_ip_from_client_ip_without_auto() {
let mut span = Annotated::from_json(
r#"{
"start_timestamp": 0,
"timestamp": 1,
"trace_id": "922dda2462ea4ac2b6a4b339bee90863",
"span_id": "922dda2462ea4ac2",
"data": {
"client.address": "2.125.160.216"
}
}"#,
)
.unwrap();

normalize(&mut span, normalize_config()).unwrap();

assert_eq!(get_value!(span.data.user_geo_city!), "Boxford");
}

#[test]
fn user_ip_from_client_ip_with_auto() {
let mut span = Annotated::from_json(
r#"{
"start_timestamp": 0,
"timestamp": 1,
"trace_id": "922dda2462ea4ac2b6a4b339bee90863",
"span_id": "922dda2462ea4ac2",
"data": {
"client.address": "{{auto}}"
}
}"#,
)
.unwrap();

normalize(&mut span, normalize_config()).unwrap();

assert_eq!(get_value!(span.data.user_geo_city!), "Boxford");
}
}
Loading