From 68b0f216bb9fa620fce9a345e5780375860e2b6a Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 13 Sep 2022 17:14:09 +0200 Subject: [PATCH 1/8] ref: Apply size limit in Pattern::new --- relay-general/src/pii/config.rs | 24 ++++++++++++++++++------ relay-general/src/pii/convert.rs | 8 +------- relay-general/src/pii/regexes.rs | 4 ++-- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/relay-general/src/pii/config.rs b/relay-general/src/pii/config.rs index 82a67c618e..16045d2a46 100644 --- a/relay-general/src/pii/config.rs +++ b/relay-general/src/pii/config.rs @@ -11,7 +11,22 @@ use crate::processor::SelectorSpec; /// A regex pattern for text replacement. #[derive(Clone)] -pub struct Pattern(pub Regex); +pub struct Pattern(Regex); + +impl Pattern { + pub fn new(s: &str, case_insensitive: bool) -> Result { + let regex = RegexBuilder::new(s) + .size_limit(262_144) + .case_insensitive(case_insensitive) + .build()?; + + Ok(Self(regex)) + } + + pub fn regex(&self) -> &Regex { + &self.0 + } +} impl Deref for Pattern { type Target = Regex; @@ -42,11 +57,8 @@ impl Serialize for Pattern { impl<'de> Deserialize<'de> for Pattern { fn deserialize>(deserializer: D) -> Result { let raw = String::deserialize(deserializer)?; - let pattern = RegexBuilder::new(&raw) - .size_limit(262_144) - .build() - .map_err(Error::custom)?; - Ok(Pattern(pattern)) + let pattern = Pattern::new(&raw, false).map_err(Error::custom)?; + Ok(pattern) } } diff --git a/relay-general/src/pii/convert.rs b/relay-general/src/pii/convert.rs index 14a630c615..3825d197bd 100644 --- a/relay-general/src/pii/convert.rs +++ b/relay-general/src/pii/convert.rs @@ -1,7 +1,6 @@ use std::collections::BTreeMap; use once_cell::sync::Lazy; -use regex::RegexBuilder; use crate::pii::{ DataScrubbingConfig, Pattern, PiiConfig, RedactPairRule, Redaction, RuleSpec, RuleType, Vars, @@ -70,12 +69,7 @@ pub fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option smallvec![( kv, - &redact_pair.key_pattern.0, + redact_pair.key_pattern.regex(), ReplaceBehavior::replace_value() )], RuleType::Password => { @@ -69,7 +69,7 @@ pub fn get_regex_for_rule_type( None => ReplaceBehavior::replace_match(), }; - smallvec![(v, &r.pattern.0, replace_behavior)] + smallvec![(v, r.pattern.regex(), replace_behavior)] } RuleType::Imei => smallvec![(v, &*IMEI_REGEX, ReplaceBehavior::replace_match())], From df2a9f2f9d8d4e20f04720f20607bb4acda4ee5f Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 14 Sep 2022 15:33:01 +0200 Subject: [PATCH 2/8] test: Write a test, see it fail --- relay-general/src/pii/convert.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/relay-general/src/pii/convert.rs b/relay-general/src/pii/convert.rs index 3825d197bd..8196dd1d6b 100644 --- a/relay-general/src/pii/convert.rs +++ b/relay-general/src/pii/convert.rs @@ -278,6 +278,18 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv "###); } + #[test] + fn test_convert_sensitive_fields_too_large() { + let pii_config = to_pii_config(&DataScrubbingConfig { + sensitive_fields: vec!["fieldy_field"] + .repeat(999999) + .into_iter() + .map(|x| x.to_string()) + .collect(), + ..simple_enabled_config() + }); + } + #[test] fn test_convert_exclude_field() { let pii_config = to_pii_config(&DataScrubbingConfig { From ed619bcf78f2aea4d6b1607c85ad5a523e411602 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 14 Sep 2022 15:56:01 +0200 Subject: [PATCH 3/8] fix: Make to_pii_config fallible --- relay-general/src/pii/convert.rs | 17 +++++++++-------- relay-general/src/pii/legacy.rs | 8 ++++---- relay-server/src/actors/processor.rs | 3 ++- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/relay-general/src/pii/convert.rs b/relay-general/src/pii/convert.rs index 8196dd1d6b..00e4913e3d 100644 --- a/relay-general/src/pii/convert.rs +++ b/relay-general/src/pii/convert.rs @@ -25,7 +25,7 @@ static DATASCRUBBER_IGNORE: Lazy = Lazy::new(|| { .unwrap() }); -pub fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option { +pub fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Result, ()> { let mut custom_rules = BTreeMap::new(); let mut applied_rules = Vec::new(); let mut applications = BTreeMap::new(); @@ -69,7 +69,7 @@ pub fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Option Option Option Option { - let rv = to_pii_config_impl(datascrubbing_config); + let rv = to_pii_config_impl(datascrubbing_config).unwrap(); if let Some(ref config) = rv { let roundtrip: PiiConfig = serde_json::from_value(serde_json::to_value(config).unwrap()).unwrap(); @@ -280,14 +280,15 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv #[test] fn test_convert_sensitive_fields_too_large() { - let pii_config = to_pii_config(&DataScrubbingConfig { + let result = to_pii_config_impl(&DataScrubbingConfig { sensitive_fields: vec!["fieldy_field"] - .repeat(999999) + .repeat(999) .into_iter() .map(|x| x.to_string()) .collect(), ..simple_enabled_config() }); + assert!(result.is_err()); } #[test] diff --git a/relay-general/src/pii/legacy.rs b/relay-general/src/pii/legacy.rs index a20c068f75..0eb9f10639 100644 --- a/relay-general/src/pii/legacy.rs +++ b/relay-general/src/pii/legacy.rs @@ -36,7 +36,7 @@ pub struct DataScrubbingConfig { /// /// Cached because the conversion process is expensive. #[serde(skip)] - pub(super) pii_config: OnceCell>, + pub(super) pii_config: OnceCell, ()>>, } impl DataScrubbingConfig { @@ -48,7 +48,7 @@ impl DataScrubbingConfig { scrub_ip_addresses: false, sensitive_fields: vec![], scrub_defaults: false, - pii_config: OnceCell::with_value(None), + pii_config: OnceCell::with_value(Ok(None)), } } @@ -61,7 +61,7 @@ impl DataScrubbingConfig { /// /// This can be computationally expensive when called for the first time. The result is cached /// internally and reused on the second call. - pub fn pii_config(&self) -> Option<&'_ PiiConfig> { + pub fn pii_config(&self) -> Result<&'_ Option, &()> { self.pii_config .get_or_init(|| self.pii_config_uncached()) .as_ref() @@ -69,7 +69,7 @@ impl DataScrubbingConfig { /// Like [`pii_config`](Self::pii_config) but without internal caching. #[inline] - pub fn pii_config_uncached(&self) -> Option { + pub fn pii_config_uncached(&self) -> Result, ()> { convert::to_pii_config(self) } } diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index cf1f9807c3..d203d195db 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -1642,7 +1642,8 @@ impl EnvelopeProcessor { process_value(event, &mut processor, ProcessingState::root()) .map_err(ProcessingError::ProcessingFailed)?; } - if let Some(config) = config.datascrubbing_settings.pii_config() { + let pii_config = config.datascrubbing_settings.pii_config()?; + if let Some(config) = pii_config { let mut processor = PiiProcessor::new(config.compiled()); process_value(event, &mut processor, ProcessingState::root()) .map_err(ProcessingError::ProcessingFailed)?; From ab04eaa5111e261f0ad11b51c8e8f6a0c016fb99 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 15 Sep 2022 09:01:38 +0200 Subject: [PATCH 4/8] fix: Remaining function calls --- relay-cabi/src/processing.rs | 6 ++++-- relay-general/benches/benchmarks.rs | 2 +- relay-general/src/pii/config.rs | 12 ++++++++++-- relay-general/src/pii/convert.rs | 8 ++++++-- relay-general/src/pii/legacy.rs | 8 +++++--- relay-general/src/pii/mod.rs | 4 ++-- relay-server/src/actors/processor.rs | 14 ++++++++++---- 7 files changed, 38 insertions(+), 16 deletions(-) diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs index 77b9e777e4..8706fa3800 100644 --- a/relay-cabi/src/processing.rs +++ b/relay-cabi/src/processing.rs @@ -143,8 +143,10 @@ pub unsafe extern "C" fn relay_validate_pii_config(value: *const RelayStr) -> Re pub unsafe extern "C" fn relay_convert_datascrubbing_config(config: *const RelayStr) -> RelayStr { let config: DataScrubbingConfig = serde_json::from_str((*config).as_str())?; match config.pii_config() { - Some(config) => RelayStr::from_string(config.to_json()?), - None => RelayStr::new("{}"), + Ok(Some(config)) => RelayStr::from_string(config.to_json()?), + Ok(None) => RelayStr::new("{}"), + // NOTE: Callers of this function must be able to handle this error. + Err(e) => RelayStr::from_string(e.to_string()), } } diff --git a/relay-general/benches/benchmarks.rs b/relay-general/benches/benchmarks.rs index 1bd3af9155..d479d9928b 100644 --- a/relay-general/benches/benchmarks.rs +++ b/relay-general/benches/benchmarks.rs @@ -140,7 +140,7 @@ fn bench_pii_stripping(c: &mut Criterion) { |b, datascrubbing_config| b.iter(|| datascrubbing_config.pii_config_uncached()), ); - let pii_config = datascrubbing_config.pii_config_uncached().unwrap(); + let pii_config = datascrubbing_config.pii_config_uncached().unwrap().unwrap(); group.bench_with_input( BenchmarkId::new("compile_pii_config", config_name), diff --git a/relay-general/src/pii/config.rs b/relay-general/src/pii/config.rs index 16045d2a46..40df1ef056 100644 --- a/relay-general/src/pii/config.rs +++ b/relay-general/src/pii/config.rs @@ -8,17 +8,25 @@ use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer}; use crate::pii::{CompiledPiiConfig, Redaction}; use crate::processor::SelectorSpec; +use failure::Fail; + +#[derive(Clone, Debug, Fail)] +pub enum PiiConfigError { + #[fail(display = "could not parse regex")] + RegexError(#[cause] regex::Error), +} /// A regex pattern for text replacement. #[derive(Clone)] pub struct Pattern(Regex); impl Pattern { - pub fn new(s: &str, case_insensitive: bool) -> Result { + pub fn new(s: &str, case_insensitive: bool) -> Result { let regex = RegexBuilder::new(s) .size_limit(262_144) .case_insensitive(case_insensitive) - .build()?; + .build() + .map_err(PiiConfigError::RegexError)?; Ok(Self(regex)) } diff --git a/relay-general/src/pii/convert.rs b/relay-general/src/pii/convert.rs index 00e4913e3d..c8c4248c3b 100644 --- a/relay-general/src/pii/convert.rs +++ b/relay-general/src/pii/convert.rs @@ -7,6 +7,8 @@ use crate::pii::{ }; use crate::processor::{SelectorPathItem, SelectorSpec, ValueType}; +use crate::pii::config::PiiConfigError; + // XXX: Move to @ip rule for better IP address scrubbing. Right now we just try to keep // compatibility with Python. static KNOWN_IP_FIELDS: Lazy = Lazy::new(|| { @@ -25,7 +27,9 @@ static DATASCRUBBER_IGNORE: Lazy = Lazy::new(|| { .unwrap() }); -pub fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Result, ()> { +pub fn to_pii_config( + datascrubbing_config: &DataScrubbingConfig, +) -> Result, PiiConfigError> { let mut custom_rules = BTreeMap::new(); let mut applied_rules = Vec::new(); let mut applications = BTreeMap::new(); @@ -69,7 +73,7 @@ pub fn to_pii_config(datascrubbing_config: &DataScrubbingConfig) -> Result bool { @@ -36,7 +38,7 @@ pub struct DataScrubbingConfig { /// /// Cached because the conversion process is expensive. #[serde(skip)] - pub(super) pii_config: OnceCell, ()>>, + pub(super) pii_config: OnceCell, PiiConfigError>>, } impl DataScrubbingConfig { @@ -61,7 +63,7 @@ impl DataScrubbingConfig { /// /// This can be computationally expensive when called for the first time. The result is cached /// internally and reused on the second call. - pub fn pii_config(&self) -> Result<&'_ Option, &()> { + pub fn pii_config(&self) -> Result<&'_ Option, &PiiConfigError> { self.pii_config .get_or_init(|| self.pii_config_uncached()) .as_ref() @@ -69,7 +71,7 @@ impl DataScrubbingConfig { /// Like [`pii_config`](Self::pii_config) but without internal caching. #[inline] - pub fn pii_config_uncached(&self) -> Result, ()> { + pub fn pii_config_uncached(&self) -> Result, PiiConfigError> { convert::to_pii_config(self) } } diff --git a/relay-general/src/pii/mod.rs b/relay-general/src/pii/mod.rs index 9295acb83c..71cc7ad326 100644 --- a/relay-general/src/pii/mod.rs +++ b/relay-general/src/pii/mod.rs @@ -16,8 +16,8 @@ mod utils; pub use self::attachments::{PiiAttachmentsProcessor, ScrubEncodings}; pub use self::compiledconfig::CompiledPiiConfig; pub use self::config::{ - AliasRule, MultipleRule, Pattern, PatternRule, PiiConfig, RedactPairRule, RuleSpec, RuleType, - Vars, + AliasRule, MultipleRule, Pattern, PatternRule, PiiConfig, PiiConfigError, RedactPairRule, + RuleSpec, RuleType, Vars, }; pub use self::generate_selectors::selector_suggestions_from_value; pub use self::legacy::DataScrubbingConfig; diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index d203d195db..600f1b8d98 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -13,12 +13,11 @@ use failure::Fail; use flate2::write::{GzEncoder, ZlibEncoder}; use flate2::Compression; use once_cell::sync::OnceCell; -use serde_json::Value as SerdeValue; - use relay_auth::RelayVersion; use relay_common::{clone, ProjectId, ProjectKey, UnixTimestamp}; use relay_config::{Config, HttpEncoding}; use relay_filter::FilterStatKey; +use relay_general::pii::PiiConfigError; use relay_general::pii::{PiiAttachmentsProcessor, PiiProcessor}; use relay_general::processor::{process_value, ProcessingState}; use relay_general::protocol::{ @@ -34,6 +33,7 @@ use relay_quotas::{DataCategory, RateLimits, ReasonCode}; use relay_redis::RedisPool; use relay_sampling::RuleId; use relay_statsd::metric; +use serde_json::Value as SerdeValue; use crate::actors::envelopes::{EnvelopeManager, SendEnvelope, SendEnvelopeError, SubmitEnvelope}; use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; @@ -113,6 +113,9 @@ pub enum ProcessingError { #[fail(display = "event dropped by sampling rule {}", _0)] Sampled(RuleId), + + #[fail(display = "invalid pii config")] + PiiConfigError(PiiConfigError), } impl ProcessingError { @@ -139,7 +142,7 @@ impl ProcessingError { Self::InvalidUnrealReport(_) => Some(Outcome::Invalid(DiscardReason::ProcessUnreal)), // Internal errors - Self::SerializeFailed(_) | Self::ProcessingFailed(_) => { + Self::SerializeFailed(_) | Self::ProcessingFailed(_) | Self::PiiConfigError(_) => { Some(Outcome::Invalid(DiscardReason::Internal)) } #[cfg(feature = "processing")] @@ -1642,7 +1645,10 @@ impl EnvelopeProcessor { process_value(event, &mut processor, ProcessingState::root()) .map_err(ProcessingError::ProcessingFailed)?; } - let pii_config = config.datascrubbing_settings.pii_config()?; + let pii_config = config + .datascrubbing_settings + .pii_config() + .map_err(|e| ProcessingError::PiiConfigError(e.clone()))?; if let Some(config) = pii_config { let mut processor = PiiProcessor::new(config.compiled()); process_value(event, &mut processor, ProcessingState::root()) From 64b0abd547d5d40051f48d37399d7b6a9109b8c0 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 15 Sep 2022 09:40:31 +0200 Subject: [PATCH 5/8] changelog and update test --- CHANGELOG.md | 1 + py/CHANGELOG.md | 1 + relay-general/src/pii/convert.rs | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ecf706fff3..7d023f4a21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Process required stacktraces to fix filtering events originating from browser extensions. ([#1423](https://github.com/getsentry/relay/pull/1423)) - Fix error message filtering when formatting the message of logentry. ([#1442](https://github.com/getsentry/relay/pull/1442)) - Use the different configuration for billing outcomes when specified. ([#1461](https://github.com/getsentry/relay/pull/1461)) +- Fix panic in datascrubbing when number of sensitive fields was too large. ([#1474](https://github.com/getsentry/relay/pull/1474)) **Internal**: diff --git a/py/CHANGELOG.md b/py/CHANGELOG.md index b0e2791871..b66b111b0b 100644 --- a/py/CHANGELOG.md +++ b/py/CHANGELOG.md @@ -4,6 +4,7 @@ - Add `transaction_info` to event payloads, including the transaction's source and internal original transaction name. ([#1330](https://github.com/getsentry/relay/pull/1330)) - Add user-agent parsing to replays processor. ([#1420](https://github.com/getsentry/relay/pull/1420)) +- convert_datascrubbing_config will now return an error string when conversion fails. ([#1474](https://github.com/getsentry/relay/pull/1474)) ## 0.8.13 diff --git a/relay-general/src/pii/convert.rs b/relay-general/src/pii/convert.rs index c8c4248c3b..8586461863 100644 --- a/relay-general/src/pii/convert.rs +++ b/relay-general/src/pii/convert.rs @@ -285,8 +285,8 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv #[test] fn test_convert_sensitive_fields_too_large() { let result = to_pii_config_impl(&DataScrubbingConfig { - sensitive_fields: vec!["fieldy_field"] - .repeat(999) + sensitive_fields: vec!["1"] + .repeat(4085) // lowest number that will fail .into_iter() .map(|x| x.to_string()) .collect(), From 434c99c1b85a637997a3e04a6e8d24e0ddc06d80 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 15 Sep 2022 11:35:29 +0200 Subject: [PATCH 6/8] Apply suggestions from code review Co-authored-by: Jan Michael Auer Co-authored-by: Iker Barriocanal <32816711+iker-barriocanal@users.noreply.github.com> --- CHANGELOG.md | 1 - py/CHANGELOG.md | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5823fb8156..bbd329280d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,6 @@ - Process required stacktraces to fix filtering events originating from browser extensions. ([#1423](https://github.com/getsentry/relay/pull/1423)) - Fix error message filtering when formatting the message of logentry. ([#1442](https://github.com/getsentry/relay/pull/1442)) - Loosen type requirements for the `user.id` field in Replays. ([#1443](https://github.com/getsentry/relay/pull/1443)) -- Use the different configuration for billing outcomes when specified. ([#1461](https://github.com/getsentry/relay/pull/1461)) - Fix panic in datascrubbing when number of sensitive fields was too large. ([#1474](https://github.com/getsentry/relay/pull/1474)) diff --git a/py/CHANGELOG.md b/py/CHANGELOG.md index b66b111b0b..30d74617a7 100644 --- a/py/CHANGELOG.md +++ b/py/CHANGELOG.md @@ -4,7 +4,7 @@ - Add `transaction_info` to event payloads, including the transaction's source and internal original transaction name. ([#1330](https://github.com/getsentry/relay/pull/1330)) - Add user-agent parsing to replays processor. ([#1420](https://github.com/getsentry/relay/pull/1420)) -- convert_datascrubbing_config will now return an error string when conversion fails. ([#1474](https://github.com/getsentry/relay/pull/1474)) +- `convert_datascrubbing_config` will now return an error string when conversion fails on big regexes. ([#1474](https://github.com/getsentry/relay/pull/1474)) ## 0.8.13 From 8dbbc85ae5571a80010408d567d23bfa74c5efd2 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 15 Sep 2022 11:54:43 +0200 Subject: [PATCH 7/8] PR feedback --- relay-general/src/pii/config.rs | 11 ++++++----- relay-general/src/pii/convert.rs | 7 +++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/relay-general/src/pii/config.rs b/relay-general/src/pii/config.rs index 40df1ef056..d8262bf4a7 100644 --- a/relay-general/src/pii/config.rs +++ b/relay-general/src/pii/config.rs @@ -2,13 +2,15 @@ use std::collections::{BTreeMap, BTreeSet}; use std::fmt; use std::ops::Deref; +use failure::Fail; use once_cell::sync::OnceCell; use regex::{Regex, RegexBuilder}; use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer}; use crate::pii::{CompiledPiiConfig, Redaction}; use crate::processor::SelectorSpec; -use failure::Fail; + +const COMPILED_PATTERN_MAX_SIZE: usize = 262_144; #[derive(Clone, Debug, Fail)] pub enum PiiConfigError { @@ -21,9 +23,9 @@ pub enum PiiConfigError { pub struct Pattern(Regex); impl Pattern { - pub fn new(s: &str, case_insensitive: bool) -> Result { + pub fn parse(s: &str, case_insensitive: bool) -> Result { let regex = RegexBuilder::new(s) - .size_limit(262_144) + .size_limit(COMPILED_PATTERN_MAX_SIZE) .case_insensitive(case_insensitive) .build() .map_err(PiiConfigError::RegexError)?; @@ -65,8 +67,7 @@ impl Serialize for Pattern { impl<'de> Deserialize<'de> for Pattern { fn deserialize>(deserializer: D) -> Result { let raw = String::deserialize(deserializer)?; - let pattern = Pattern::new(&raw, false).map_err(Error::custom)?; - Ok(pattern) + Pattern::parse(&raw, false).map_err(Error::custom) } } diff --git a/relay-general/src/pii/convert.rs b/relay-general/src/pii/convert.rs index 8586461863..fab46fbfd4 100644 --- a/relay-general/src/pii/convert.rs +++ b/relay-general/src/pii/convert.rs @@ -3,12 +3,11 @@ use std::collections::BTreeMap; use once_cell::sync::Lazy; use crate::pii::{ - DataScrubbingConfig, Pattern, PiiConfig, RedactPairRule, Redaction, RuleSpec, RuleType, Vars, + DataScrubbingConfig, Pattern, PiiConfig, PiiConfigError, RedactPairRule, Redaction, RuleSpec, + RuleType, Vars, }; use crate::processor::{SelectorPathItem, SelectorSpec, ValueType}; -use crate::pii::config::PiiConfigError; - // XXX: Move to @ip rule for better IP address scrubbing. Right now we just try to keep // compatibility with Python. static KNOWN_IP_FIELDS: Lazy = Lazy::new(|| { @@ -73,7 +72,7 @@ pub fn to_pii_config( "strip-fields".to_owned(), RuleSpec { ty: RuleType::RedactPair(RedactPairRule { - key_pattern: Pattern::new(&key_pattern, true)?, + key_pattern: Pattern::parse(&key_pattern, true)?, }), redaction: Redaction::Replace("[Filtered]".to_owned().into()), }, From 69a7dbef96cc61b5255ec51c073afe7bfe5a6e8f Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 16 Sep 2022 09:49:57 +0200 Subject: [PATCH 8/8] ref: Separate discard reason for pii conversion failure, doc comment --- relay-general/src/pii/legacy.rs | 4 ++++ relay-server/src/actors/outcome.rs | 5 +++++ relay-server/src/actors/processor.rs | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/relay-general/src/pii/legacy.rs b/relay-general/src/pii/legacy.rs index 18dd4c73f3..d3119867d7 100644 --- a/relay-general/src/pii/legacy.rs +++ b/relay-general/src/pii/legacy.rs @@ -37,6 +37,10 @@ pub struct DataScrubbingConfig { /// PII config derived from datascrubbing settings. /// /// Cached because the conversion process is expensive. + /// + /// NOTE: We discarded the idea of making the conversion to PiiConfig part of deserialization, + /// because we want the conversion to run on the processing sync arbiter, so that it does not + /// slow down or even crash other parts of the system. #[serde(skip)] pub(super) pii_config: OnceCell, PiiConfigError>>, } diff --git a/relay-server/src/actors/outcome.rs b/relay-server/src/actors/outcome.rs index 0813054f3b..d04217cf82 100644 --- a/relay-server/src/actors/outcome.rs +++ b/relay-server/src/actors/outcome.rs @@ -310,6 +310,10 @@ pub enum DiscardReason { /// (Relay) A project state returned by the upstream could not be parsed. ProjectState, + /// (Relay) A project state returned by the upstream contained datascrubbing settings + /// that could not be converted to PII config. + ProjectStatePii, + /// (Relay) An envelope was submitted with two items that need to be unique. DuplicateItem, @@ -368,6 +372,7 @@ impl DiscardReason { DiscardReason::InvalidEnvelope => "invalid_envelope", DiscardReason::InvalidCompression => "invalid_compression", DiscardReason::ProjectState => "project_state", + DiscardReason::ProjectStatePii => "project_state_pii", DiscardReason::DuplicateItem => "duplicate_item", DiscardReason::NoEventPayload => "no_event_payload", DiscardReason::Internal => "internal", diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 2bb5e913fa..6ad02657f5 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -142,11 +142,12 @@ impl ProcessingError { Self::InvalidUnrealReport(_) => Some(Outcome::Invalid(DiscardReason::ProcessUnreal)), // Internal errors - Self::SerializeFailed(_) | Self::ProcessingFailed(_) | Self::PiiConfigError(_) => { + Self::SerializeFailed(_) | Self::ProcessingFailed(_) => { Some(Outcome::Invalid(DiscardReason::Internal)) } #[cfg(feature = "processing")] Self::QuotasFailed(_) => Some(Outcome::Invalid(DiscardReason::Internal)), + Self::PiiConfigError(_) => Some(Outcome::Invalid(DiscardReason::ProjectStatePii)), // These outcomes are emitted at the source. Self::MissingProjectId => None,