getsentry · jjbayer · Jun 7, 2022 · Jun 3, 2022 · Jun 3, 2022 · Jun 3, 2022
@@ -21,7 +21,7 @@
 - Add support for profile outcomes. ([#1272](https://github.com/getsentry/relay/pull/1272))
 - Avoid potential panics when scrubbing minidumps. ([#1282](https://github.com/getsentry/relay/pull/1282))
 - Fix typescript profile validation. ([#1283](https://github.com/getsentry/relay/pull/1283))
-- Track memory footprint of metrics buckets. ([#1284](https://github.com/getsentry/relay/pull/1284))
+- Track memory footprint of metrics buckets. ([#1284](https://github.com/getsentry/relay/pull/1284), [#1288](https://github.com/getsentry/relay/pull/1288)).
 
 ## 22.5.0
 

@@ -1,6 +1,7 @@
 use std::collections::{btree_map, hash_map::Entry, BTreeMap, BTreeSet, HashMap};
 
 use std::fmt;
+use std::mem;
 use std::time::{Duration, Instant};
 
 use actix::prelude::*;
@@ -521,19 +522,32 @@ impl BucketValue {
         }
     }
 
-    /// Estimates the number of bytes needed to encode the bucket.
-    /// Note that this does not necessarily match the exact memory footprint of the bucket,
+    /// Estimates the number of bytes needed to encode the bucket value.
+    /// Note that this does not necessarily match the exact memory footprint of the value,
     /// because datastructures might have a memory overhead.
     ///
     /// This is very similar to [`BucketValue::relative_size`], which can possibly be removed.
     pub fn cost(&self) -> usize {
-        match self {
-            Self::Counter(_) => 8,
-            Self::Set(s) => 4 * s.len(),
-            Self::Gauge(_) => 5 * 8,
-            // Distribution values are stored as maps of (f64, u32) pairs
-            Self::Distribution(m) => 12 * m.internal_size(),
-        }
+        // We use private type aliases and constants here to make sure that a change in the implementation
+        // does not change the cost model without being noticed.
+        // Note that when the actual data type of these bucket values changes, we have to update
+        // the cost model and potentially increase the limits that are applied to it.
+        type SetType = u32;
+        /// [`BucketValue`] struct currently fits into 48 bytes:
+        const SELF_SIZE: usize = 48;
+        // Distribution values are stored as maps of (f64, u32) pairs:
+        const DIST_SIZE: usize = mem::size_of::<f64>() + mem::size_of::<u32>();
+
+        // Beside the size of [`BucketValue`], we also need to account for the cost of values
+        // allocated dynamically.
+        let allocated_cost = match self {
+            Self::Counter(_) => 0,
+            Self::Set(s) => mem::size_of::<SetType>() * s.len(),
+            Self::Gauge(_) => 0,
+            Self::Distribution(m) => DIST_SIZE * m.internal_size(),
+        };
+
+        SELF_SIZE + allocated_cost
     }
 }
 
@@ -769,6 +783,11 @@ struct BucketKey {
     tags: BTreeMap<String, String>,
 }
 
+/// The memory cost of storing a string
+fn string_cost(s: &String) -> usize {
+    mem::size_of::<String>() + s.capacity()
+}
+
 impl BucketKey {
     /// An extremely hamfisted way to hash a bucket key into an integer.
     ///
@@ -786,6 +805,30 @@ impl BucketKey {
         std::hash::Hash::hash(self, &mut hasher);
         hasher.finalize() as i64
     }
+
+    /// Estimates the number of bytes needed to encode the bucket key.
+    /// Note that this does not necessarily match the exact memory footprint of the key,
+    /// because datastructures might have a memory overhead.
+    fn cost(&self) -> usize {
+        // We use private constants here to make sure that a change in the implementation
+        // does not change the cost model without being noticed.
+        // Note that when the actual data type of any of the subkeys changes, we have to update
+        // the cost model and potentially increase the limits that are applied to it.
+        const PROJECT_KEY_SIZE: usize = 32;
+        const TIMESTAMP_SIZE: usize = mem::size_of::<u64>();
+        const METRIC_TYPE_SIZE: usize = mem::size_of::<u8>(); // small enum
+        const METRIC_UNIT_SIZE: usize = 16; // 1 (discriminator) + 15 (size of CustomUnit, the largest value)
+
+        PROJECT_KEY_SIZE
+            + TIMESTAMP_SIZE
+            + string_cost(&self.metric_name)
+            + METRIC_TYPE_SIZE
+            + METRIC_UNIT_SIZE
+            + self
+                .tags
+                .iter()
+                .fold(0, |acc, (k, v)| acc + string_cost(k) + string_cost(v))
+    }
 }
 
 /// Parameters used by the [`Aggregator`].
@@ -1033,12 +1076,10 @@ enum AggregatorState {
     ShuttingDown,
 }
 
-#[derive(Debug, Default)]
+#[derive(Default)]
 struct CostTracker {
     total_cost: usize,
-    // Choosing a BTreeMap instead of a HashMap here, under the assumption that a BTreeMap
-    // is still more efficient for the number of project keys we store.
-    cost_per_project_key: BTreeMap<ProjectKey, usize>,
+    cost_per_project_key: HashMap<ProjectKey, usize>,
 }
 
 impl CostTracker {
@@ -1050,12 +1091,12 @@ impl CostTracker {
 
     fn subtract_cost(&mut self, project_key: ProjectKey, cost: usize) {
         match self.cost_per_project_key.entry(project_key) {
-            btree_map::Entry::Vacant(_) => {
+            Entry::Vacant(_) => {
                 relay_log::error!(
                     "Trying to subtract cost for a project key that has not been tracked"
                 );
             }
-            btree_map::Entry::Occupied(mut entry) => {
+            Entry::Occupied(mut entry) => {
                 // Handle per-project cost:
                 let project_cost = entry.get_mut();
                 if cost > *project_cost {
@@ -1075,6 +1116,17 @@ impl CostTracker {
     }
 }
 
+impl fmt::Debug for CostTracker {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let mut values: Vec<(&ProjectKey, &usize)> = self.cost_per_project_key.iter().collect();
+        values.sort();
+        f.debug_struct("CostTracker")
+            .field("total_cost", &self.total_cost)
+            .field("cost_per_project_key", &values)
+            .finish()
+    }
+}
+
 /// A collector of [`Metric`] submissions.
 ///
 /// # Aggregation
@@ -1287,7 +1339,7 @@ impl Aggregator {
 
                 let flush_at = self.config.get_flush_time(timestamp, project_key);
                 let bucket = value.into();
-                added_cost = bucket.cost();
+                added_cost = entry.key().cost() + bucket.cost();
 let cost_before = bucket_value.cost(); 
 value.merge_into(bucket_value)?; 
 let cost_after = bucket_value.cost(); 
 added_cost = cost_after.saturating_sub(cost_before); 
 let cost_before = bucket_value.cost(); 
 value.merge_into(bucket_value)?; 
 let cost_after = bucket_value.cost(); 
 added_cost = cost_after.saturating_sub(cost_before); 
                 entry.insert(QueuedBucket::new(flush_at, bucket));
             }
         }
@@ -1388,8 +1440,8 @@ impl Aggregator {
             self.buckets.retain(|key, entry| {
                 if force || entry.elapsed() {
                     // Take the value and leave a placeholder behind. It'll be removed right after.
-                    let value = std::mem::replace(&mut entry.value, BucketValue::Counter(0.0));
-                    cost_tracker.subtract_cost(key.project_key, value.cost());
+                    let value = mem::replace(&mut entry.value, BucketValue::Counter(0.0));
+                    cost_tracker.subtract_cost(key.project_key, key.cost() + value.cost());
                     let bucket = Bucket::from_parts(key.clone(), bucket_interval, value);
                     buckets.entry(key.project_key).or_default().push(bucket);
 
@@ -1967,20 +2019,59 @@ mod tests {
 
     #[test]
     fn test_bucket_value_cost() {
+        // This test ensures that the cost model for bucket values matches the actual cost of storing
+        // the data. When this test fails, update both the cost model and the dimensionality limits
+        // applied to it.
         let counter = BucketValue::Counter(123.0);
-        assert_eq!(counter.cost(), 8);
-        let set = BucketValue::Set(vec![1, 2, 3, 4, 5].into_iter().collect());
-        assert_eq!(set.cost(), 20);
+        assert_eq!(counter.cost(), mem::size_of_val(&counter));
+        let set_entries = BTreeSet::<u32>::from([1, 2, 3, 4, 5]);
+        let set_entry_size = mem::size_of_val(set_entries.iter().next().unwrap());
+        let set = BucketValue::Set(set_entries);
+        assert_eq!(set.cost(), mem::size_of_val(&set) + 5 * set_entry_size);
         let distribution = BucketValue::Distribution(dist![1., 2., 3.]);
-        assert_eq!(distribution.cost(), 36);
+        assert_eq!(
+            distribution.cost(),
+            mem::size_of_val(&distribution) + 3 * (mem::size_of::<f64>() + mem::size_of::<Count>())
+        );
         let gauge = BucketValue::Gauge(GaugeValue {
             max: 43.,
             min: 42.,
             sum: 85.,
             last: 43.,
             count: 2,
         });
-        assert_eq!(gauge.cost(), 40);
+        assert_eq!(gauge.cost(), mem::size_of_val(&gauge));
+    }
+
+    #[test]
+    fn test_bucket_key_cost() {
+        let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap();
+        let name = "12345".to_owned();
+        let bucket_key = BucketKey {
+            project_key,
+            timestamp: UnixTimestamp::now(),
+            metric_name: name.clone(),
+            metric_type: MetricType::Counter,
+            metric_unit: MetricUnit::None,
+            tags: BTreeMap::from([
+                ("hello".to_owned(), "world".to_owned()),
+                ("answer".to_owned(), "42".to_owned()),
+            ]),
+        };
+        assert_eq!(
+            bucket_key.cost(),
+            mem::size_of_val(&project_key)
+                + mem::size_of::<UnixTimestamp>()
+                + string_cost(&name)
+                + mem::size_of::<MetricType>()
+                + mem::size_of::<MetricUnit>()
+                // Tag costs:
+                + 4 * 24
+                + 5
+                + 5
+                + 6
+                + 2
+        );
     }
 
     #[test]
@@ -2168,63 +2259,84 @@ mod tests {
         insta::assert_debug_snapshot!(cost_tracker, @r###"
         CostTracker {
             total_cost: 0,
-            cost_per_project_key: {},
+            cost_per_project_key: [],
         }
         "###);
         cost_tracker.add_cost(project_key1, 100);
         insta::assert_debug_snapshot!(cost_tracker, @r###"
         CostTracker {
             total_cost: 100,
-            cost_per_project_key: {
-                ProjectKey("a94ae32be2584e0bbd7a4cbb95971fed"): 100,
-            },
+            cost_per_project_key: [
+                (
+                    ProjectKey("a94ae32be2584e0bbd7a4cbb95971fed"),
+                    100,
+                ),
+            ],
         }
         "###);
         cost_tracker.add_cost(project_key2, 200);
         insta::assert_debug_snapshot!(cost_tracker, @r###"
         CostTracker {
             total_cost: 300,
-            cost_per_project_key: {
-                ProjectKey("a94ae32be2584e0bbd7a4cbb95971fed"): 100,
-                ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"): 200,
-            },
+            cost_per_project_key: [
+                (
+                    ProjectKey("a94ae32be2584e0bbd7a4cbb95971fed"),
+                    100,
+                ),
+                (
+                    ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"),
+                    200,
+                ),
+            ],
         }
         "###);
         // Unknown project: Will log error, but not crash
         cost_tracker.subtract_cost(project_key3, 666);
         insta::assert_debug_snapshot!(cost_tracker, @r###"
         CostTracker {
             total_cost: 300,
-            cost_per_project_key: {
-                ProjectKey("a94ae32be2584e0bbd7a4cbb95971fed"): 100,
-                ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"): 200,
-            },
+            cost_per_project_key: [
+                (
+                    ProjectKey("a94ae32be2584e0bbd7a4cbb95971fed"),
+                    100,
+                ),
+                (
+                    ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"),
+                    200,
+                ),
+            ],
         }
         "###);
         // Subtract too much: Will log error, but not crash
         cost_tracker.subtract_cost(project_key1, 666);
         insta::assert_debug_snapshot!(cost_tracker, @r###"
         CostTracker {
             total_cost: 200,
-            cost_per_project_key: {
-                ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"): 200,
-            },
+            cost_per_project_key: [
+                (
+                    ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"),
+                    200,
+                ),
+            ],
         }
         "###);
         cost_tracker.subtract_cost(project_key2, 20);
         insta::assert_debug_snapshot!(cost_tracker, @r###"
         CostTracker {
             total_cost: 180,
-            cost_per_project_key: {
-                ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"): 180,
-            },
+            cost_per_project_key: [
+                (
+                    ProjectKey("a94ae32be2584e0bbd7a4cbb95971fee"),
+                    180,
+                ),
+            ],
         }
         "###);
         cost_tracker.subtract_cost(project_key2, 180);
         insta::assert_debug_snapshot!(cost_tracker, @r###"
         CostTracker {
             total_cost: 0,
-            cost_per_project_key: {},
+            cost_per_project_key: [],
         }
         "###);
     }
@@ -2243,22 +2355,38 @@ mod tests {
             timestamp: UnixTimestamp::from_secs(999994711),
             tags: BTreeMap::new(),
         };
-        for (metric_value, expected_total_cost) in [
-            (MetricValue::Counter(42.), 8),
-            (MetricValue::Counter(42.), 8), // counters have constant size
-            (MetricValue::Set(123), 12),    // 8 + 1*4
-            (MetricValue::Set(123), 12),    // Same element in set, no change
-            (MetricValue::Set(456), 16),    // Different element in set -> +4
-            (MetricValue::Distribution(1.0), 28), // 1 unique element -> +12
-            (MetricValue::Distribution(1.0), 28), // no new element
-            (MetricValue::Distribution(2.0), 40), // 1 new element -> +12
-            (MetricValue::Gauge(0.3), 80),
-            (MetricValue::Gauge(0.2), 80), // gauge has constant size
+        let bucket_key = BucketKey {
+            project_key,
+            timestamp: UnixTimestamp::now(),
+            metric_name: "c:foo".to_owned(),
+            metric_type: MetricType::Counter,
+            metric_unit: MetricUnit::None,
+            tags: BTreeMap::new(),
+        };
+        let fixed_cost = bucket_key.cost() + mem::size_of::<BucketValue>();
+        for (metric_value, expected_added_cost) in [
+            (MetricValue::Counter(42.), fixed_cost),
+            (MetricValue::Counter(42.), 0), // counters have constant size
+            (MetricValue::Set(123), fixed_cost + 4), // Added a new bucket + 1 element
+            (MetricValue::Set(123), 0),     // Same element in set, no change
+            (MetricValue::Set(456), 4),     // Different element in set -> +4
+            (MetricValue::Distribution(1.0), fixed_cost + 12), // New bucket + 1 element
+            (MetricValue::Distribution(1.0), 0), // no new element
+            (MetricValue::Distribution(2.0), 12), // 1 new element
+            (MetricValue::Gauge(0.3), fixed_cost), // New bucket
+            (MetricValue::Gauge(0.2), 0),   // gauge has constant size
         ] {
             metric.value = metric_value;
+            let current_cost = aggregator.cost_tracker.total_cost;
             aggregator.insert(project_key, metric.clone()).unwrap();
-            assert_eq!(aggregator.cost_tracker.total_cost, expected_total_cost);
+            assert_eq!(
+                aggregator.cost_tracker.total_cost,
+                current_cost + expected_added_cost
+            );
         }
+
+        aggregator.pop_flush_buckets();
+        assert_eq!(aggregator.cost_tracker.total_cost, 0);
     }
 
     #[test]