From 92949cfaacaca25def9159b0ee332f325abfdbe6 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Mon, 13 Jan 2025 10:58:42 +0100 Subject: [PATCH] Fix formatting of RowId/Tuid when printing ChunkStore (#8656) Also added a regression test * Part of #3741 --------- Co-authored-by: Clement Rey --- Cargo.lock | 14 ++++-- Cargo.toml | 1 + crates/store/re_chunk/Cargo.toml | 1 + crates/store/re_chunk/src/chunk.rs | 4 +- crates/store/re_chunk/src/transport.rs | 11 ++++- .../store/re_chunk_store/tests/formatting.rs | 48 +++++++++++++++++++ .../formatting__format_chunk_store.snap | 33 +++++++++++++ crates/store/re_format_arrow/Cargo.toml | 1 + crates/store/re_format_arrow/src/lib.rs | 41 ++++------------ .../store/re_log_encoding/src/decoder/mod.rs | 14 ++++-- crates/store/re_types_core/src/tuid.rs | 2 +- crates/utils/re_tuid/src/lib.rs | 2 +- 12 files changed, 129 insertions(+), 43 deletions(-) create mode 100644 crates/store/re_chunk_store/tests/formatting.rs create mode 100644 crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap diff --git a/Cargo.lock b/Cargo.lock index 61be28db6ef1..3879fb612337 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3885,7 +3885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -5203,7 +5203,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" dependencies = [ "bytes", - "heck 0.4.1", + "heck 0.5.0", "itertools 0.13.0", "log", "multimap", @@ -5686,6 +5686,7 @@ dependencies = [ "re_types_core", "serde", "similar-asserts", + "tap", "thiserror 1.0.65", ] @@ -5980,6 +5981,7 @@ name = "re_format_arrow" version = "0.22.0-alpha.1+dev" dependencies = [ "comfy-table", + "itertools 0.13.0", "re_arrow2", "re_tuid", "re_types_core", @@ -8483,6 +8485,12 @@ dependencies = [ "libc", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "target-lexicon" version = "0.12.16" @@ -9848,7 +9856,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 28e8da64fc1f..7f5ca6092ce4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -269,6 +269,7 @@ strum_macros = "0.26" sublime_fuzzy = "0.7" syn = "2.0" sysinfo = { version = "0.30.1", default-features = false } +tap = "1.0.1" tempfile = "3.0" thiserror = "1.0" time = { version = "0.3.36", default-features = false, features = [ diff --git a/crates/store/re_chunk/Cargo.toml b/crates/store/re_chunk/Cargo.toml index 4e091da8a29e..38a9e5b8c963 100644 --- a/crates/store/re_chunk/Cargo.toml +++ b/crates/store/re_chunk/Cargo.toml @@ -63,6 +63,7 @@ itertools.workspace = true nohash-hasher.workspace = true rand = { workspace = true, features = ["std_rng"] } similar-asserts.workspace = true +tap.workspace = true thiserror.workspace = true # Optional dependencies: diff --git a/crates/store/re_chunk/src/chunk.rs b/crates/store/re_chunk/src/chunk.rs index eb0bc8045b0e..b7032f6acf14 100644 --- a/crates/store/re_chunk/src/chunk.rs +++ b/crates/store/re_chunk/src/chunk.rs @@ -1355,11 +1355,11 @@ impl Chunk { impl std::fmt::Display for Chunk { #[inline] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let chunk = self.to_transport().map_err(|err| { + let transport_chunk = self.to_transport().map_err(|err| { re_log::error_once!("couldn't display Chunk: {err}"); std::fmt::Error })?; - chunk.fmt(f) + transport_chunk.fmt(f) } } diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs index e603ad9fd856..69350f685d71 100644 --- a/crates/store/re_chunk/src/transport.rs +++ b/crates/store/re_chunk/src/transport.rs @@ -13,6 +13,7 @@ use nohash_hasher::IntMap; use re_byte_size::SizeBytes as _; use re_log_types::{EntityPath, Timeline}; use re_types_core::{Component as _, ComponentDescriptor, Loggable as _}; +use tap::Tap as _; use crate::{chunk::ChunkComponents, Chunk, ChunkError, ChunkId, ChunkResult, RowId, TimeColumn}; @@ -434,7 +435,15 @@ impl Chunk { row_ids.data_type().clone(), false, ) - .with_metadata(TransportChunk::field_metadata_control_column()), + .with_metadata( + TransportChunk::field_metadata_control_column().tap_mut(|metadata| { + // This ensures the RowId/Tuid is formatted correctly + metadata.insert( + "ARROW:extension:name".to_owned(), + re_tuid::Tuid::ARROW_EXTENSION_NAME.to_owned(), + ); + }), + ), ); columns.push(row_ids.clone().boxed()); } diff --git a/crates/store/re_chunk_store/tests/formatting.rs b/crates/store/re_chunk_store/tests/formatting.rs new file mode 100644 index 000000000000..e71fbffdbd1c --- /dev/null +++ b/crates/store/re_chunk_store/tests/formatting.rs @@ -0,0 +1,48 @@ +use std::sync::Arc; + +use re_chunk::{Chunk, ChunkId, RowId}; +use re_chunk_store::ChunkStore; +use re_log_types::{ + build_frame_nr, build_log_time, + example_components::{MyColor, MyIndex}, + EntityPath, Time, +}; +use re_types_core::ComponentBatch as _; + +/// Ensure that `ChunkStore::to_string()` is nice and readable. +#[test] +fn format_chunk_store() -> anyhow::Result<()> { + re_log::setup_logging(); + + let mut store = ChunkStore::new( + re_log_types::StoreId::from_string( + re_log_types::StoreKind::Recording, + "test_id".to_owned(), + ), + Default::default(), + ); + + let entity_path = EntityPath::from("this/that"); + + let (indices1, colors1) = (MyIndex::from_iter(0..3), MyColor::from_iter(0..3)); + + let chunk_id = ChunkId::from_u128(123_456_789_123_456_789_123_456_789); + let row_id = RowId::from_u128(32_033_410_000_000_000_000_000_000_123); + + store.insert_chunk(&Arc::new( + Chunk::builder_with_id(chunk_id, entity_path.clone()) + .with_serialized_batches( + row_id, + [ + build_frame_nr(1), + build_log_time(Time::from_ns_since_epoch(1_736_534_622_123_456_789)), + ], + [indices1.try_serialized()?, colors1.try_serialized()?], + ) + .build()?, + ))?; + + insta::assert_snapshot!("format_chunk_store", store.to_string()); + + Ok(()) +} diff --git a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap new file mode 100644 index 000000000000..f8edb7be14e0 --- /dev/null +++ b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap @@ -0,0 +1,33 @@ +--- +source: crates/store/re_chunk_store/tests/formatting.rs +expression: store.to_string() +--- +ChunkStore { + id: test_id + config: ChunkStoreConfig { enable_changelog: true, chunk_max_bytes: 393216, chunk_max_rows: 4096, chunk_max_rows_if_unsorted: 1024 } + stats: { + num_chunks: 1 + total_size_bytes: 1.0 KiB + num_rows: 1 + num_events: 2 + } + chunks: [ + ┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ + │ CHUNK METADATA: │ + │ * entity_path: "/this/that" │ + │ * heap_size_bytes: "838" │ + │ * id: "661EFDF2E3B19F7C045F15" │ + │ * is_sorted: "" │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ ┌──────────────────────────────────┬───────────────┬───────────────────────────────┬───────────────────┬───────────────────┐ │ + │ │ RowId ┆ frame_nr ┆ log_time ┆ example.MyColor ┆ example.MyIndex │ │ + │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ + │ │ type: "struct[2]" ┆ type: "i64" ┆ type: "timestamp(ns)" ┆ type: "list[u32]" ┆ type: "list[u64]" │ │ + │ │ ARROW:extension:name: "TUID" ┆ is_sorted: "" ┆ is_sorted: "" ┆ kind: "data" ┆ kind: "data" │ │ + │ │ kind: "control" ┆ kind: "time" ┆ kind: "time" ┆ ┆ │ │ + │ ╞══════════════════════════════════╪═══════════════╪═══════════════════════════════╪═══════════════════╪═══════════════════╡ │ + │ │ 0000000067816A6BB4B8C1254D40007B ┆ 1 ┆ 2025-01-10 18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │ + │ └──────────────────────────────────┴───────────────┴───────────────────────────────┴───────────────────┴───────────────────┘ │ + └──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + ] +} diff --git a/crates/store/re_format_arrow/Cargo.toml b/crates/store/re_format_arrow/Cargo.toml index e752139d70b2..f2d76dd0f8e4 100644 --- a/crates/store/re_format_arrow/Cargo.toml +++ b/crates/store/re_format_arrow/Cargo.toml @@ -21,6 +21,7 @@ all-features = true [dependencies] arrow2.workspace = true +itertools.workspace = true re_tuid.workspace = true re_types_core.workspace = true # tuid serialization diff --git a/crates/store/re_format_arrow/src/lib.rs b/crates/store/re_format_arrow/src/lib.rs index 22ebebbb89f5..a6b143053067 100644 --- a/crates/store/re_format_arrow/src/lib.rs +++ b/crates/store/re_format_arrow/src/lib.rs @@ -21,24 +21,13 @@ use re_types_core::Loggable as _; type CustomFormatter<'a, F> = Box std::fmt::Result + 'a>; fn get_custom_display<'a, F: std::fmt::Write + 'a>( + field: &Field, array: &'a dyn Array, null: &'static str, ) -> CustomFormatter<'a, F> { - // NOTE: If the top-level array is a list, it's probably not the type we're looking for: we're - // interested in the type of the array that's underneath. - let datatype = (|| match array.data_type().to_logical_type() { - DataType::List(_) => array - .as_any() - .downcast_ref::>()? - .iter() - .next()? - .map(|array| array.data_type().clone()), - _ => Some(array.data_type().clone()), - })(); - - if let Some(DataType::Extension(name, _, _)) = datatype { + if let Some(extension_name) = field.metadata.get("ARROW:extension:name") { // TODO(#1775): This should be registered dynamically. - if name.as_str() == Tuid::NAME { + if extension_name.as_str() == Tuid::ARROW_EXTENSION_NAME { return Box::new(|w, index| { if let Some(tuid) = parse_tuid(array, index) { w.write_fmt(format_args!("{tuid}")) @@ -244,21 +233,10 @@ where outer_table.add_row({ let mut row = Row::new(); - row.add_cell({ - let cell = Cell::new(format!( - "CHUNK METADATA:\n{}", - DisplayMetadata(metadata, "* ") - )); - - #[cfg(not(target_arch = "wasm32"))] // requires TTY - { - cell.add_attribute(comfy_table::Attribute::Italic) - } - #[cfg(target_arch = "wasm32")] - { - cell - } - }); + row.add_cell(Cell::new(format!( + "CHUNK METADATA:\n{}", + DisplayMetadata(metadata, "* ") + ))); row }); @@ -280,9 +258,8 @@ where }); table.set_header(header); - let displays = columns - .iter() - .map(|array| get_custom_display(&**array, "-")) + let displays = itertools::izip!(fields.iter(), columns.iter()) + .map(|(field, array)| get_custom_display(field, &**array, "-")) .collect::>(); let num_rows = columns.first().map_or(0, |list_array| list_array.len()); diff --git a/crates/store/re_log_encoding/src/decoder/mod.rs b/crates/store/re_log_encoding/src/decoder/mod.rs index 224bc03ceef2..50f6517f9a75 100644 --- a/crates/store/re_log_encoding/src/decoder/mod.rs +++ b/crates/store/re_log_encoding/src/decoder/mod.rs @@ -456,6 +456,7 @@ mod tests { ] } + // TODO(#3741): should not be needed once the migration from arrow2 is complete fn clear_arrow_extension_metadata(messages: &mut Vec) { for msg in messages { if let LogMsg::ArrowMsg(_, arrow_msg) = msg { @@ -472,7 +473,7 @@ mod tests { fn test_encode_decode() { let rrd_version = CrateVersion::LOCAL; - let messages = fake_log_messages(); + let mut messages = fake_log_messages(); let options = [ EncodingOptions { @@ -503,9 +504,14 @@ mod tests { .collect::, DecodeError>>() .unwrap(); + // TODO(#3741): should not be needed once the migration from arrow2 is complete + clear_arrow_extension_metadata(&mut messages); clear_arrow_extension_metadata(&mut decoded_messages); - assert_eq!(messages, decoded_messages); + assert!( + messages == decoded_messages, + "Got: {decoded_messages:#?}, expected: {messages:#?}" + ); } } @@ -536,7 +542,7 @@ mod tests { let mut data = vec![]; // write "2 files" i.e. 2 streams that end with end-of-stream marker - let messages = fake_log_messages(); + let mut messages = fake_log_messages(); // (2 encoders as each encoder writes a file header) let writer = std::io::Cursor::new(&mut data); @@ -565,6 +571,8 @@ mod tests { let mut decoded_messages = decoder.into_iter().collect::, _>>().unwrap(); + // TODO(#3741): should not be needed once the migration from arrow2 is complete + clear_arrow_extension_metadata(&mut messages); clear_arrow_extension_metadata(&mut decoded_messages); assert_eq!([messages.clone(), messages].concat(), decoded_messages); diff --git a/crates/store/re_types_core/src/tuid.rs b/crates/store/re_types_core/src/tuid.rs index 5d0e5afa42b0..622805a8e353 100644 --- a/crates/store/re_types_core/src/tuid.rs +++ b/crates/store/re_types_core/src/tuid.rs @@ -27,7 +27,7 @@ impl Loggable for Tuid { Self: 'a, { Err(crate::SerializationError::not_implemented( - Self::NAME, + Self::ARROW_EXTENSION_NAME, "TUIDs are never nullable, use `to_arrow()` instead", )) } diff --git a/crates/utils/re_tuid/src/lib.rs b/crates/utils/re_tuid/src/lib.rs index bcfdaa86aeee..f215f1da3477 100644 --- a/crates/utils/re_tuid/src/lib.rs +++ b/crates/utils/re_tuid/src/lib.rs @@ -23,7 +23,7 @@ impl Tuid { /// We give an actual name to [`Tuid`], and inject that name into the Arrow datatype extensions, /// as a hack so that we can compactly format them when printing Arrow data to the terminal. /// Check out `re_format_arrow` for context. - pub const NAME: &'static str = "rerun.datatypes.TUID"; + pub const ARROW_EXTENSION_NAME: &'static str = "rerun.datatypes.TUID"; } impl std::fmt::Display for Tuid {