Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref(ds): Low cardinality outcome reason #3623

Merged
merged 9 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Unreleased

**Internal**:

- Map outcome reasons for dynamic sampling to reduced set of values. ([#3623](https://github.com/getsentry/relay/pull/3623))

## 24.5.0

**Breaking Changes**:
Expand Down
98 changes: 96 additions & 2 deletions relay-server/src/services/outcome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//! pipeline, outcomes may not be emitted if the item is accepted.

use std::borrow::Cow;
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
use std::error::Error;
use std::net::IpAddr;
use std::sync::Arc;
Expand All @@ -24,6 +24,7 @@ use relay_filter::FilterStatKey;
#[cfg(feature = "processing")]
use relay_kafka::{ClientError, KafkaClient, KafkaTopic};
use relay_quotas::{DataCategory, ReasonCode, Scoping};
use relay_sampling::config::RuleId;
use relay_sampling::evaluation::MatchedRuleIds;
use relay_statsd::metric;
use relay_system::{Addr, FromMessage, Interface, NoResponse, Service};
Expand Down Expand Up @@ -164,7 +165,7 @@ pub enum Outcome {
Filtered(FilterStatKey),

/// The event has been filtered by a Sampling Rule
FilteredSampling(MatchedRuleIds),
FilteredSampling(RuleCategories),

/// The event has been rate limited.
RateLimited(Option<ReasonCode>),
Expand Down Expand Up @@ -251,6 +252,79 @@ impl fmt::Display for Outcome {
}
}

/// A lower-cardinality version of [`RuleId`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum RuleCategory {
BoostLowVolumeProjects,
BoostEnvironments,
IgnoreHealthChecks,
BoostKeyTransactions,
Recalibration,
BoostReplayId,
BoostLowVolumeTransactions,
BoostLatestReleases,
Custom,
Other,
}

impl RuleCategory {
fn as_str(&self) -> &'static str {
match self {
Self::BoostLowVolumeProjects => "1000",
Self::BoostEnvironments => "1001",
Self::IgnoreHealthChecks => "1002",
Self::BoostKeyTransactions => "1003",
Self::Recalibration => "1004",
Self::BoostReplayId => "1005",
Self::BoostLowVolumeTransactions => "1400",
Self::BoostLatestReleases => "1500",
Self::Custom => "3000",
Self::Other => "0",
}
}
}

impl From<RuleId> for RuleCategory {
fn from(value: RuleId) -> Self {
match value.0 {
1000 => Self::BoostLowVolumeProjects,
1001 => Self::BoostEnvironments,
1002 => Self::IgnoreHealthChecks,
1003 => Self::BoostKeyTransactions,
1004 => Self::Recalibration,
1005 => Self::BoostReplayId,
1400..=1499 => Self::BoostLowVolumeTransactions,
1500..=1599 => Self::BoostLatestReleases,
3000..=4999 => Self::Custom,
_ => Self::Other,
}
}
}

/// An ordered set of categories that can be used as outcome reason.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct RuleCategories(pub BTreeSet<RuleCategory>);

impl fmt::Display for RuleCategories {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for (i, c) in self.0.iter().enumerate() {
if i > 0 {
write!(f, ",")?;
}
write!(f, "{}", c.as_str())?;
}
Ok(())
}
}

impl From<MatchedRuleIds> for RuleCategories {
fn from(value: MatchedRuleIds) -> Self {
RuleCategories(BTreeSet::from_iter(
value.0.into_iter().map(RuleCategory::from),
))
}
}

/// Reason for a discarded invalid event.
///
/// Used in `Outcome::Invalid`. Synchronize overlap with Sentry.
Expand Down Expand Up @@ -971,3 +1045,23 @@ impl Service for OutcomeProducerService {
});
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn rule_category_roundtrip() {
let input = "123,1004,1500,1403,1403,1404,1000";
let rule_ids = MatchedRuleIds::parse(input).unwrap();
let rule_categories = RuleCategories::from(rule_ids);

let serialized = rule_categories.to_string();
assert_eq!(&serialized, "1000,1004,1400,1500,0");

assert_eq!(
MatchedRuleIds::parse(&serialized).unwrap(),
MatchedRuleIds([1000, 1004, 1400, 1500, 0].map(RuleId).into())
);
}
}
2 changes: 1 addition & 1 deletion relay-server/src/services/processor/dynamic_sampling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ pub fn sample_envelope_items(
let unsampled_profiles_enabled = forward_unsampled_profiles(state, global_config);

let matched_rules = sampling_match.into_matched_rules();
let outcome = Outcome::FilteredSampling(matched_rules.clone());
let outcome = Outcome::FilteredSampling(matched_rules.into());
state.managed_envelope.retain_items(|item| {
if unsampled_profiles_enabled && item.ty() == &ItemType::Profile {
item.set_sampled(false);
Expand Down
46 changes: 39 additions & 7 deletions relay-server/src/services/processor/report.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use relay_sampling::evaluation::MatchedRuleIds;
use relay_system::Addr;

use crate::envelope::{ContentType, ItemType};
use crate::services::outcome::{Outcome, TrackOutcome};
use crate::services::outcome::{Outcome, RuleCategories, TrackOutcome};
use crate::services::processor::{ClientReportGroup, ProcessEnvelopeState, MINIMUM_CLOCK_DRIFT};
use crate::utils::ItemAction;

Expand Down Expand Up @@ -240,6 +240,7 @@ fn outcome_from_parts(field: ClientReportField, reason: &str) -> Result<Outcome,
match field {
ClientReportField::FilteredSampling => match reason.strip_prefix("Sampled:") {
Some(rule_ids) => MatchedRuleIds::parse(rule_ids)
.map(RuleCategories::from)
.map(Outcome::FilteredSampling)
.map_err(|_| ()),
None => Err(()),
Expand All @@ -261,11 +262,11 @@ mod tests {
use std::sync::Arc;

use relay_event_schema::protocol::EventId;
use relay_sampling::config::RuleId;
use relay_sampling::evaluation::ReservoirCounters;

use crate::envelope::{Envelope, Item};
use crate::extractors::RequestMeta;
use crate::services::outcome::RuleCategory;
use crate::services::processor::{ProcessEnvelope, ProcessingGroup};
use crate::services::project::ProjectState;
use crate::testutils::{self, create_test_processor};
Expand Down Expand Up @@ -559,15 +560,46 @@ mod tests {

assert_eq!(
outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:123,456"),
Ok(Outcome::FilteredSampling(MatchedRuleIds(vec![
RuleId(123),
RuleId(456),
])))
Ok(Outcome::FilteredSampling(RuleCategories(
[RuleCategory::Other].into()
)))
);

assert_eq!(
outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:123"),
Ok(Outcome::FilteredSampling(MatchedRuleIds(vec![RuleId(123)])))
Ok(Outcome::FilteredSampling(RuleCategories(
[RuleCategory::Other].into()
)))
);

assert_eq!(
outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:123"),
Ok(Outcome::FilteredSampling(RuleCategories(
[RuleCategory::Other].into()
)))
);

assert_eq!(
outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:1001"),
Ok(Outcome::FilteredSampling(RuleCategories(
[RuleCategory::BoostEnvironments].into()
)))
);

assert_eq!(
outcome_from_parts(
ClientReportField::FilteredSampling,
"Sampled:1001,1456,1567,3333,4444"
),
Ok(Outcome::FilteredSampling(RuleCategories(
[
RuleCategory::BoostEnvironments,
RuleCategory::BoostLowVolumeTransactions,
RuleCategory::BoostLatestReleases,
RuleCategory::Custom
]
.into()
)))
);
}

Expand Down
4 changes: 2 additions & 2 deletions relay-server/src/services/processor/span/processing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use relay_spans::{otel_to_sentry_span, otel_trace::Span as OtelSpan};

use crate::envelope::{ContentType, Envelope, Item, ItemType};
use crate::metrics_extraction::generic::extract_metrics;
use crate::services::outcome::{DiscardReason, Outcome};
use crate::services::outcome::{DiscardReason, Outcome, RuleCategories};
use crate::services::processor::span::extract_transaction_span;
use crate::services::processor::{
dynamic_sampling, Addrs, ProcessEnvelope, ProcessEnvelopeState, ProcessingError,
Expand Down Expand Up @@ -53,7 +53,7 @@ pub fn process(
// once for all spans in the envelope.
let sampling_outcome = match dynamic_sampling::run(state, &config) {
SamplingResult::Match(sampling_match) if sampling_match.should_drop() => Some(
Outcome::FilteredSampling(sampling_match.into_matched_rules()),
Outcome::FilteredSampling(RuleCategories::from(sampling_match.into_matched_rules())),
),
_ => None,
};
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_dynamic_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def test_it_removes_events(mini_sentry, relay):
public_key = config["publicKeys"][0]["publicKey"]

# add a sampling rule to project config that removes all transactions (sample_rate=0)
rules = _add_sampling_config(config, sample_rate=0, rule_type="transaction")
_add_sampling_config(config, sample_rate=0, rule_type="transaction")

# create an envelope with a trace context that is initiated by this project (for simplicity)
envelope, trace_id, event_id = _create_transaction_envelope(public_key)
Expand All @@ -253,7 +253,7 @@ def test_it_removes_events(mini_sentry, relay):
assert outcomes is not None
outcome = outcomes["outcomes"][0]
assert outcome.get("outcome") == 1
assert outcome.get("reason") == f"Sampled:{rules[0]['id']}"
assert outcome.get("reason") == "Sampled:0"


def test_it_does_not_sample_error(mini_sentry, relay):
Expand Down Expand Up @@ -392,7 +392,7 @@ def test_sample_on_parametrized_root_transaction(mini_sentry, relay):
relay.send_envelope(project_id, envelope)

outcome = mini_sentry.captured_outcomes.get(timeout=2)
assert outcome["outcomes"][0]["reason"] == "Sampled:1"
assert outcome["outcomes"][0]["reason"] == "Sampled:0"


def test_it_keeps_events(mini_sentry, relay):
Expand Down
24 changes: 12 additions & 12 deletions tests/integration/test_outcome.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,7 @@ def test_outcome_to_client_report(relay, mini_sentry):
"version": 2,
"rules": [
{
"id": 1,
"id": 3001,
"samplingValue": {"type": "sampleRate", "value": 0.0},
"type": "transaction",
"condition": {
Expand Down Expand Up @@ -853,7 +853,7 @@ def test_outcome_to_client_report(relay, mini_sentry):
"project_id": 42,
"key_id": 123,
"outcome": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"category": 9,
"quantity": 1,
}
Expand Down Expand Up @@ -960,7 +960,7 @@ def test_outcomes_aggregate_dynamic_sampling(relay, mini_sentry):
"version": 2,
"rules": [
{
"id": 1,
"id": 3001,
"samplingValue": {"type": "sampleRate", "value": 0.0},
"type": "transaction",
"condition": {
Expand Down Expand Up @@ -1006,7 +1006,7 @@ def test_outcomes_aggregate_dynamic_sampling(relay, mini_sentry):
"project_id": 42,
"key_id": 123,
"outcome": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"category": 9,
"quantity": 2,
}
Expand Down Expand Up @@ -1069,7 +1069,7 @@ def test_graceful_shutdown(relay, mini_sentry):
"version": 2,
"rules": [
{
"id": 1,
"id": 3001,
"samplingValue": {"type": "sampleRate", "value": 0.0},
"type": "transaction",
"condition": {
Expand Down Expand Up @@ -1120,7 +1120,7 @@ def test_graceful_shutdown(relay, mini_sentry):
"project_id": 42,
"key_id": 123,
"outcome": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"category": 9,
"quantity": 1,
}
Expand Down Expand Up @@ -1157,7 +1157,7 @@ def test_profile_outcomes(
"version": 2,
"rules": [
{
"id": 1,
"id": 3001,
"samplingValue": {"type": "sampleRate", "value": 0.0},
"type": "transaction",
"condition": {
Expand Down Expand Up @@ -1242,7 +1242,7 @@ def make_envelope(transaction_name):
"outcome": 1,
"project_id": 42,
"quantity": 6, # len(b"foobar")
"reason": "Sampled:1",
"reason": "Sampled:3000",
"source": expected_source,
},
{
Expand All @@ -1252,7 +1252,7 @@ def make_envelope(transaction_name):
"outcome": 1, # Filtered
"project_id": 42,
"quantity": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"source": expected_source,
},
{
Expand All @@ -1262,7 +1262,7 @@ def make_envelope(transaction_name):
"outcome": 1, # Filtered
"project_id": 42,
"quantity": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"source": expected_source,
},
]
Expand Down Expand Up @@ -1751,7 +1751,7 @@ def test_span_outcomes(
"version": 2,
"rules": [
{
"id": 1,
"id": 3001,
"samplingValue": {"type": "sampleRate", "value": 0.0},
"type": "transaction",
"condition": {
Expand Down Expand Up @@ -1827,7 +1827,7 @@ def make_envelope(transaction_name):
"outcome": 1, # Filtered
"project_id": 42,
"quantity": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"source": expected_source,
},
{
Expand Down
Loading
Loading