finishing touches

rerun-io · Apr 12, 2023 · 08a5837 · 08a5837 · github-actions · Apr 12, 2023
1 parent 25d044b
commit 08a5837
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 4 deletions.
diff --git a/crates/re_arrow_store/src/store.rs b/crates/re_arrow_store/src/store.rs
@@ -23,8 +23,13 @@ pub struct DataStoreConfig {
     /// to a specific timeline _and_ a specific entity.
     ///
     /// This effectively puts an upper bound on the number of rows that need to be sorted when an
-    /// indexed bucket gets out of order.
+    /// indexed bucket gets out of order (e.g. because of new insertions or a GC pass).
     /// This is a tradeoff: less rows means faster sorts at the cost of more metadata overhead.
+    /// In particular:
+    /// - Query performance scales inversely logarithmically to this number (i.e. it gets better
+    ///   the higher this number gets).
+    /// - GC performance scales quadratically with this number (i.e. it gets better the lower this
+    ///   number gets).
     ///
     /// See [`Self::DEFAULT`] for defaults.
     pub indexed_bucket_num_rows: u64,

diff --git a/crates/re_arrow_store/src/store_gc.rs b/crates/re_arrow_store/src/store_gc.rs
@@ -61,6 +61,8 @@ impl DataStore {
     //
     // TODO(#1803): The GC should be aware of latest-at semantics and make sure they are upheld
     // when purging data.
+    //
+    // TODO(#1823): Workload specific optimizations.
     pub fn gc(&mut self, target: GarbageCollectionTarget) -> (Vec<RowId>, DataStoreStats) {
         crate::profile_function!();
 

diff --git a/crates/re_arrow_store/src/store_write.rs b/crates/re_arrow_store/src/store_write.rs
@@ -244,9 +244,10 @@ impl MetadataRegistry<TimePoint> {
             std::collections::btree_map::Entry::Occupied(mut entry) => {
                 let entry = entry.get_mut();
                 for (timeline, time) in timepoint {
-                    let overwritten = entry.insert(timeline, time).is_some();
-                    re_log::error!(%row_id, ?timeline, ?time, "detected re-used `RowId/Timeline` pair, this is illegal and will lead to undefined behavior in the datastore");
-                    debug_assert!(!overwritten);
+                    if let Some(old_time) = entry.insert(timeline, time) {
+                        re_log::error!(%row_id, ?timeline, old_time = ?old_time, new_time = ?time, "detected re-used `RowId/Timeline` pair, this is illegal and will lead to undefined behavior in the datastore");
+                        debug_assert!(false, "detected re-used `RowId/Timeline`");
+                    }
                 }
             }
         }
Benchmark suite	Current: `08a5837`	Previous: `f7cdc66`	Ratio
`datastore/num_rows=1000/num_instances=1000/packed=false/insert/default`	`3401875` ns/iter (`± 24520`)	`12647423` ns/iter (`± 771443`)	`0.27`
`datastore/num_rows=1000/num_instances=1000/packed=false/latest_at/default`	`372` ns/iter (`± 1`)	`1832` ns/iter (`± 20`)	`0.20`
`datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/primary/default`	`260` ns/iter (`± 0`)	`287` ns/iter (`± 0`)	`0.91`
`datastore/num_rows=1000/num_instances=1000/packed=false/latest_at_missing/secondaries/default`	`432` ns/iter (`± 3`)	`437` ns/iter (`± 0`)	`0.99`
`datastore/num_rows=1000/num_instances=1000/packed=false/range/default`	`3507951` ns/iter (`± 27063`)	`12619455` ns/iter (`± 957541`)	`0.28`
`datastore/num_rows=1000/num_instances=1000/packed=false/gc/default`	`2404344` ns/iter (`± 12855`)
`mono_points_arrow/generate_message_bundles`	`25873782` ns/iter (`± 777249`)	`49349168` ns/iter (`± 956671`)	`0.52`
`mono_points_arrow/generate_messages`	`111442502` ns/iter (`± 922220`)	`170422381` ns/iter (`± 1445254`)	`0.65`
`mono_points_arrow/encode_log_msg`	`140999402` ns/iter (`± 880142`)	`197428348` ns/iter (`± 1536666`)	`0.71`
`mono_points_arrow/encode_total`	`281115846` ns/iter (`± 1209786`)	`418867271` ns/iter (`± 2157925`)	`0.67`
`mono_points_arrow/decode_log_msg`	`176807799` ns/iter (`± 658839`)	`250596891` ns/iter (`± 1424614`)	`0.71`
`mono_points_arrow/decode_message_bundles`	`56695290` ns/iter (`± 886416`)	`85571845` ns/iter (`± 1184302`)	`0.66`
`mono_points_arrow/decode_total`	`238784517` ns/iter (`± 1149697`)	`342919689` ns/iter (`± 2500778`)	`0.70`
`mono_points_arrow_batched/generate_message_bundles`	`19591871` ns/iter (`± 988022`)	`43919041` ns/iter (`± 1367501`)	`0.45`
`mono_points_arrow_batched/generate_messages`	`4036393` ns/iter (`± 58773`)	`9270537` ns/iter (`± 989843`)	`0.44`
`mono_points_arrow_batched/encode_log_msg`	`1367548` ns/iter (`± 6796`)	`1476663` ns/iter (`± 7486`)	`0.93`
`mono_points_arrow_batched/encode_total`	`27253186` ns/iter (`± 1120530`)	`54538211` ns/iter (`± 1869911`)	`0.50`
`mono_points_arrow_batched/decode_log_msg`	`778369` ns/iter (`± 2796`)	`855861` ns/iter (`± 3205`)	`0.91`
`mono_points_arrow_batched/decode_message_bundles`	`7607647` ns/iter (`± 67974`)	`13096938` ns/iter (`± 880697`)	`0.58`
`mono_points_arrow_batched/decode_total`	`8433239` ns/iter (`± 146384`)	`14899865` ns/iter (`± 1022613`)	`0.57`
`batch_points_arrow/generate_message_bundles`	`238590` ns/iter (`± 1323`)	`333770` ns/iter (`± 574`)	`0.71`
`batch_points_arrow/generate_messages`	`4984` ns/iter (`± 27`)	`6465` ns/iter (`± 17`)	`0.77`
`batch_points_arrow/encode_log_msg`	`256623` ns/iter (`± 1015`)	`398721` ns/iter (`± 1813`)	`0.64`
`batch_points_arrow/encode_total`	`528004` ns/iter (`± 2800`)	`762505` ns/iter (`± 3700`)	`0.69`
`batch_points_arrow/decode_log_msg`	`212020` ns/iter (`± 961`)	`351797` ns/iter (`± 1023`)	`0.60`
`batch_points_arrow/decode_message_bundles`	`1832` ns/iter (`± 21`)	`2327` ns/iter (`± 7`)	`0.79`
`batch_points_arrow/decode_total`	`215553` ns/iter (`± 1182`)	`360113` ns/iter (`± 1637`)	`0.60`
`arrow_mono_points/insert`	`2259241341` ns/iter (`± 28356051`)	`7424209949` ns/iter (`± 17492043`)	`0.30`
`arrow_mono_points/query`	`1641622` ns/iter (`± 14580`)	`1808539` ns/iter (`± 17588`)	`0.91`
`arrow_batch_points/insert`	`1152237` ns/iter (`± 3673`)	`3188138` ns/iter (`± 24782`)	`0.36`
`arrow_batch_points/query`	`16867` ns/iter (`± 122`)	`16354` ns/iter (`± 25`)	`1.03`
`arrow_batch_vecs/insert`	`26403` ns/iter (`± 104`)	`44563` ns/iter (`± 71`)	`0.59`
`arrow_batch_vecs/query`	`387891` ns/iter (`± 2010`)	`389254` ns/iter (`± 4163`)	`1.00`
`tuid/Tuid::random`	`34` ns/iter (`± 0`)	`34` ns/iter (`± 0`)	`1`