diff --git a/CHANGELOG.md b/CHANGELOG.md index 5379f2cdb9..9b7c5062e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ - Process required stacktraces to fix filtering events originating from browser extensions. ([#1423](https://github.com/getsentry/relay/pull/1423)) - Fix error message filtering when formatting the message of logentry. ([#1442](https://github.com/getsentry/relay/pull/1442)) - Loosen type requirements for the `user.id` field in Replays. ([#1443](https://github.com/getsentry/relay/pull/1443)) +- Fix panic in datascrubbing when number of sensitive fields was too large. ([#1474](https://github.com/getsentry/relay/pull/1474)) + **Internal**: diff --git a/py/CHANGELOG.md b/py/CHANGELOG.md index b0e2791871..30d74617a7 100644 --- a/py/CHANGELOG.md +++ b/py/CHANGELOG.md @@ -4,6 +4,7 @@ - Add `transaction_info` to event payloads, including the transaction's source and internal original transaction name. ([#1330](https://github.com/getsentry/relay/pull/1330)) - Add user-agent parsing to replays processor. ([#1420](https://github.com/getsentry/relay/pull/1420)) +- `convert_datascrubbing_config` will now return an error string when conversion fails on big regexes. ([#1474](https://github.com/getsentry/relay/pull/1474)) ## 0.8.13 diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs index 77b9e777e4..8706fa3800 100644 --- a/relay-cabi/src/processing.rs +++ b/relay-cabi/src/processing.rs @@ -143,8 +143,10 @@ pub unsafe extern "C" fn relay_validate_pii_config(value: *const RelayStr) -> Re pub unsafe extern "C" fn relay_convert_datascrubbing_config(config: *const RelayStr) -> RelayStr { let config: DataScrubbingConfig = serde_json::from_str((*config).as_str())?; match config.pii_config() { - Some(config) => RelayStr::from_string(config.to_json()?), - None => RelayStr::new("{}"), + Ok(Some(config)) => RelayStr::from_string(config.to_json()?), + Ok(None) => RelayStr::new("{}"), + // NOTE: Callers of this function must be able to handle this error. + Err(e) => RelayStr::from_string(e.to_string()), } } diff --git a/relay-general/benches/benchmarks.rs b/relay-general/benches/benchmarks.rs index 1bd3af9155..d479d9928b 100644 --- a/relay-general/benches/benchmarks.rs +++ b/relay-general/benches/benchmarks.rs @@ -140,7 +140,7 @@ fn bench_pii_stripping(c: &mut Criterion) { |b, datascrubbing_config| b.iter(|| datascrubbing_config.pii_config_uncached()), ); - let pii_config = datascrubbing_config.pii_config_uncached().unwrap(); + let pii_config = datascrubbing_config.pii_config_uncached().unwrap().unwrap(); group.bench_with_input( BenchmarkId::new("compile_pii_config", config_name), diff --git a/relay-general/src/pii/config.rs b/relay-general/src/pii/config.rs index 82a67c618e..d8262bf4a7 100644 --- a/relay-general/src/pii/config.rs +++ b/relay-general/src/pii/config.rs @@ -2,6 +2,7 @@ use std::collections::{BTreeMap, BTreeSet}; use std::fmt; use std::ops::Deref; +use failure::Fail; use once_cell::sync::OnceCell; use regex::{Regex, RegexBuilder}; use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer}; @@ -9,9 +10,33 @@ use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer}; use crate::pii::{CompiledPiiConfig, Redaction}; use crate::processor::SelectorSpec; +const COMPILED_PATTERN_MAX_SIZE: usize = 262_144; + +#[derive(Clone, Debug, Fail)] +pub enum PiiConfigError { + #[fail(display = "could not parse regex")] + RegexError(#[cause] regex::Error), +} + /// A regex pattern for text replacement. #[derive(Clone)] -pub struct Pattern(pub Regex); +pub struct Pattern(Regex); + +impl Pattern { + pub fn parse(s: &str, case_insensitive: bool) -> Result { + let regex = RegexBuilder::new(s) + .size_limit(COMPILED_PATTERN_MAX_SIZE) + .case_insensitive(case_insensitive) + .build() + .map_err(PiiConfigError::RegexError)?; + + Ok(Self(regex)) + } + + pub fn regex(&self) -> &Regex { + &self.0 + } +} impl Deref for Pattern { type Target = Regex; @@ -42,11 +67,7 @@ impl Serialize for Pattern { impl<'de> Deserialize<'de> for Pattern { fn deserialize>(deserializer: D) -> Result { let raw = String::deserialize(deserializer)?; - let pattern = RegexBuilder::new(&raw) - .size_limit(262_144) - .build() - .map_err(Error::custom)?; - Ok(Pattern(pattern)) + Pattern::parse(&raw, false).map_err(Error::custom) } } diff --git a/relay-general/src/pii/convert.rs b/relay-general/src/pii/convert.rs index 14a630c615..fab46fbfd4 100644 --- a/relay-general/src/pii/convert.rs +++ b/relay-general/src/pii/convert.rs @@ -1,10 +1,10 @@ use std::collections::BTreeMap; use once_cell::sync::Lazy; -use regex::RegexBuilder; use crate::pii::{ - DataScrubbingConfig, Pattern, PiiConfig, RedactPairRule, Redaction, RuleSpec, RuleType, Vars, + DataScrubbingConfig, Pattern, PiiConfig, PiiConfigError, RedactPairRule, Redaction, RuleSpec, + RuleType, Vars, }; use crate::processor::{SelectorPathItem, SelectorSpec, ValueType}; @@ -26,7 +26,9 @@ static DATASCRUBBER_IGNORE: Lazy = Lazy::new(|| { .unwrap() }); -pub fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option { +pub fn to_pii_config( + datascrubbing_config: &DataScrubbingConfig, +) -> Result, PiiConfigError> { let mut custom_rules = BTreeMap::new(); let mut applied_rules = Vec::new(); let mut applications = BTreeMap::new(); @@ -70,12 +72,7 @@ pub fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option Option Option Option { - let rv = to_pii_config_impl(datascrubbing_config); + let rv = to_pii_config_impl(datascrubbing_config).unwrap(); if let Some(ref config) = rv { let roundtrip: PiiConfig = serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap(); @@ -284,6 +281,19 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv "###); } + #[test] + fn test_convert_sensitive_fields_too_large() { + let result = to_pii_config_impl(&DataScrubbingConfig { + sensitive_fields: vec!["1"] + .repeat(4085) // lowest number that will fail + .into_iter() + .map(|x| x.to_string()) + .collect(), + ..simple_enabled_config() + }); + assert!(result.is_err()); + } + #[test] fn test_convert_exclude_field() { let pii_config = to_pii_config(&DataScrubbingConfig { diff --git a/relay-general/src/pii/legacy.rs b/relay-general/src/pii/legacy.rs index a20c068f75..d3119867d7 100644 --- a/relay-general/src/pii/legacy.rs +++ b/relay-general/src/pii/legacy.rs @@ -8,6 +8,8 @@ use serde::{Deserialize, Serialize}; use crate::pii::{convert, PiiConfig}; +use super::config::PiiConfigError; + /// Helper method to check whether a flag is false. #[allow(clippy::trivially_copy_pass_by_ref)] fn is_flag_default(flag: &bool) -> bool { @@ -35,8 +37,12 @@ pub struct DataScrubbingConfig { /// PII config derived from datascrubbing settings. /// /// Cached because the conversion process is expensive. + /// + /// NOTE: We discarded the idea of making the conversion to PiiConfig part of deserialization, + /// because we want the conversion to run on the processing sync arbiter, so that it does not + /// slow down or even crash other parts of the system. #[serde(skip)] - pub(super) pii_config: OnceCell>, + pub(super) pii_config: OnceCell, PiiConfigError>>, } impl DataScrubbingConfig { @@ -48,7 +54,7 @@ impl DataScrubbingConfig { scrub_ip_addresses: false, sensitive_fields: vec![], scrub_defaults: false, - pii_config: OnceCell::with_value(None), + pii_config: OnceCell::with_value(Ok(None)), } } @@ -61,7 +67,7 @@ impl DataScrubbingConfig { /// /// This can be computationally expensive when called for the first time. The result is cached /// internally and reused on the second call. - pub fn pii_config(&self) -> Option<&'_ PiiConfig> { + pub fn pii_config(&self) -> Result<&'_ Option, &PiiConfigError> { self.pii_config .get_or_init(|| self.pii_config_uncached()) .as_ref() @@ -69,7 +75,7 @@ impl DataScrubbingConfig { /// Like [`pii_config`](Self::pii_config) but without internal caching. #[inline] - pub fn pii_config_uncached(&self) -> Option { + pub fn pii_config_uncached(&self) -> Result, PiiConfigError> { convert::to_pii_config(self) } } diff --git a/relay-general/src/pii/mod.rs b/relay-general/src/pii/mod.rs index 9295acb83c..71cc7ad326 100644 --- a/relay-general/src/pii/mod.rs +++ b/relay-general/src/pii/mod.rs @@ -16,8 +16,8 @@ mod utils; pub use self::attachments::{PiiAttachmentsProcessor, ScrubEncodings}; pub use self::compiledconfig::CompiledPiiConfig; pub use self::config::{ - AliasRule, MultipleRule, Pattern, PatternRule, PiiConfig, RedactPairRule, RuleSpec, RuleType, - Vars, + AliasRule, MultipleRule, Pattern, PatternRule, PiiConfig, PiiConfigError, RedactPairRule, + RuleSpec, RuleType, Vars, }; pub use self::generate_selectors::selector_suggestions_from_value; pub use self::legacy::DataScrubbingConfig; diff --git a/relay-general/src/pii/regexes.rs b/relay-general/src/pii/regexes.rs index f4fa011de9..748716ed5f 100644 --- a/relay-general/src/pii/regexes.rs +++ b/relay-general/src/pii/regexes.rs @@ -54,7 +54,7 @@ pub fn get_regex_for_rule_type( match ty { RuleType::RedactPair(ref redact_pair) => smallvec![( kv, - &redact_pair.key_pattern.0, + redact_pair.key_pattern.regex(), ReplaceBehavior::replace_value() )], RuleType::Password => { @@ -69,7 +69,7 @@ pub fn get_regex_for_rule_type( None => ReplaceBehavior::replace_match(), }; - smallvec![(v, &r.pattern.0, replace_behavior)] + smallvec![(v, r.pattern.regex(), replace_behavior)] } RuleType::Imei => smallvec![(v, &*IMEI_REGEX, ReplaceBehavior::replace_match())], diff --git a/relay-server/src/actors/outcome.rs b/relay-server/src/actors/outcome.rs index 92fa243bee..3a2dfc65a1 100644 --- a/relay-server/src/actors/outcome.rs +++ b/relay-server/src/actors/outcome.rs @@ -316,6 +316,10 @@ pub enum DiscardReason { /// (Relay) A project state returned by the upstream could not be parsed. ProjectState, + /// (Relay) A project state returned by the upstream contained datascrubbing settings + /// that could not be converted to PII config. + ProjectStatePii, + /// (Relay) An envelope was submitted with two items that need to be unique. DuplicateItem, @@ -374,6 +378,7 @@ impl DiscardReason { DiscardReason::InvalidEnvelope => "invalid_envelope", DiscardReason::InvalidCompression => "invalid_compression", DiscardReason::ProjectState => "project_state", + DiscardReason::ProjectStatePii => "project_state_pii", DiscardReason::DuplicateItem => "duplicate_item", DiscardReason::NoEventPayload => "no_event_payload", DiscardReason::Internal => "internal", diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 46fcff6aaa..6a8a5dbdf6 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -20,6 +20,7 @@ use relay_auth::RelayVersion; use relay_common::{ProjectId, ProjectKey, UnixTimestamp}; use relay_config::{Config, HttpEncoding}; use relay_filter::FilterStatKey; +use relay_general::pii::PiiConfigError; use relay_general::pii::{PiiAttachmentsProcessor, PiiProcessor}; use relay_general::processor::{process_value, ProcessingState}; use relay_general::protocol::{ @@ -114,6 +115,9 @@ pub enum ProcessingError { #[fail(display = "event dropped by sampling rule {}", _0)] Sampled(RuleId), + + #[fail(display = "invalid pii config")] + PiiConfigError(PiiConfigError), } impl ProcessingError { @@ -145,6 +149,7 @@ impl ProcessingError { } #[cfg(feature = "processing")] Self::QuotasFailed(_) => Some(Outcome::Invalid(DiscardReason::Internal)), + Self::PiiConfigError(_) => Some(Outcome::Invalid(DiscardReason::ProjectStatePii)), // These outcomes are emitted at the source. Self::MissingProjectId => None, @@ -1811,7 +1816,11 @@ impl EnvelopeProcessorService { process_value(event, &mut processor, ProcessingState::root()) .map_err(ProcessingError::ProcessingFailed)?; } - if let Some(config) = config.datascrubbing_settings.pii_config() { + let pii_config = config + .datascrubbing_settings + .pii_config() + .map_err(|e| ProcessingError::PiiConfigError(e.clone()))?; + if let Some(config) = pii_config { let mut processor = PiiProcessor::new(config.compiled()); process_value(event, &mut processor, ProcessingState::root()) .map_err(ProcessingError::ProcessingFailed)?;