Skip to content

Commit

Permalink
Fix formatting of RowId/Tuid when printing ChunkStore (#8656)
Browse files Browse the repository at this point in the history
Also added a regression test

* Part of #3741

---------

Co-authored-by: Clement Rey <[email protected]>
  • Loading branch information
emilk and teh-cmc authored Jan 13, 2025
1 parent c63e6cf commit 92949cf
Show file tree
Hide file tree
Showing 12 changed files with 129 additions and 43 deletions.
14 changes: 11 additions & 3 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3885,7 +3885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
dependencies = [
"cfg-if",
"windows-targets 0.48.5",
"windows-targets 0.52.6",
]

[[package]]
Expand Down Expand Up @@ -5203,7 +5203,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
dependencies = [
"bytes",
"heck 0.4.1",
"heck 0.5.0",
"itertools 0.13.0",
"log",
"multimap",
Expand Down Expand Up @@ -5686,6 +5686,7 @@ dependencies = [
"re_types_core",
"serde",
"similar-asserts",
"tap",
"thiserror 1.0.65",
]

Expand Down Expand Up @@ -5980,6 +5981,7 @@ name = "re_format_arrow"
version = "0.22.0-alpha.1+dev"
dependencies = [
"comfy-table",
"itertools 0.13.0",
"re_arrow2",
"re_tuid",
"re_types_core",
Expand Down Expand Up @@ -8483,6 +8485,12 @@ dependencies = [
"libc",
]

[[package]]
name = "tap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"

[[package]]
name = "target-lexicon"
version = "0.12.16"
Expand Down Expand Up @@ -9848,7 +9856,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.59.0",
]

[[package]]
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ strum_macros = "0.26"
sublime_fuzzy = "0.7"
syn = "2.0"
sysinfo = { version = "0.30.1", default-features = false }
tap = "1.0.1"
tempfile = "3.0"
thiserror = "1.0"
time = { version = "0.3.36", default-features = false, features = [
Expand Down
1 change: 1 addition & 0 deletions crates/store/re_chunk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ itertools.workspace = true
nohash-hasher.workspace = true
rand = { workspace = true, features = ["std_rng"] }
similar-asserts.workspace = true
tap.workspace = true
thiserror.workspace = true

# Optional dependencies:
Expand Down
4 changes: 2 additions & 2 deletions crates/store/re_chunk/src/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1355,11 +1355,11 @@ impl Chunk {
impl std::fmt::Display for Chunk {
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let chunk = self.to_transport().map_err(|err| {
let transport_chunk = self.to_transport().map_err(|err| {
re_log::error_once!("couldn't display Chunk: {err}");
std::fmt::Error
})?;
chunk.fmt(f)
transport_chunk.fmt(f)
}
}

Expand Down
11 changes: 10 additions & 1 deletion crates/store/re_chunk/src/transport.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use nohash_hasher::IntMap;
use re_byte_size::SizeBytes as _;
use re_log_types::{EntityPath, Timeline};
use re_types_core::{Component as _, ComponentDescriptor, Loggable as _};
use tap::Tap as _;

use crate::{chunk::ChunkComponents, Chunk, ChunkError, ChunkId, ChunkResult, RowId, TimeColumn};

Expand Down Expand Up @@ -434,7 +435,15 @@ impl Chunk {
row_ids.data_type().clone(),
false,
)
.with_metadata(TransportChunk::field_metadata_control_column()),
.with_metadata(
TransportChunk::field_metadata_control_column().tap_mut(|metadata| {
// This ensures the RowId/Tuid is formatted correctly
metadata.insert(
"ARROW:extension:name".to_owned(),
re_tuid::Tuid::ARROW_EXTENSION_NAME.to_owned(),
);
}),
),
);
columns.push(row_ids.clone().boxed());
}
Expand Down
48 changes: 48 additions & 0 deletions crates/store/re_chunk_store/tests/formatting.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use std::sync::Arc;

use re_chunk::{Chunk, ChunkId, RowId};
use re_chunk_store::ChunkStore;
use re_log_types::{
build_frame_nr, build_log_time,
example_components::{MyColor, MyIndex},
EntityPath, Time,
};
use re_types_core::ComponentBatch as _;

/// Ensure that `ChunkStore::to_string()` is nice and readable.
#[test]
fn format_chunk_store() -> anyhow::Result<()> {
re_log::setup_logging();

let mut store = ChunkStore::new(
re_log_types::StoreId::from_string(
re_log_types::StoreKind::Recording,
"test_id".to_owned(),
),
Default::default(),
);

let entity_path = EntityPath::from("this/that");

let (indices1, colors1) = (MyIndex::from_iter(0..3), MyColor::from_iter(0..3));

let chunk_id = ChunkId::from_u128(123_456_789_123_456_789_123_456_789);
let row_id = RowId::from_u128(32_033_410_000_000_000_000_000_000_123);

store.insert_chunk(&Arc::new(
Chunk::builder_with_id(chunk_id, entity_path.clone())
.with_serialized_batches(
row_id,
[
build_frame_nr(1),
build_log_time(Time::from_ns_since_epoch(1_736_534_622_123_456_789)),
],
[indices1.try_serialized()?, colors1.try_serialized()?],
)
.build()?,
))?;

insta::assert_snapshot!("format_chunk_store", store.to_string());

Ok(())
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
source: crates/store/re_chunk_store/tests/formatting.rs
expression: store.to_string()
---
ChunkStore {
id: test_id
config: ChunkStoreConfig { enable_changelog: true, chunk_max_bytes: 393216, chunk_max_rows: 4096, chunk_max_rows_if_unsorted: 1024 }
stats: {
num_chunks: 1
total_size_bytes: 1.0 KiB
num_rows: 1
num_events: 2
}
chunks: [
┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
CHUNK METADATA: │
* entity_path: "/this/that"
* heap_size_bytes: "838"
* id: "661EFDF2E3B19F7C045F15"
* is_sorted: ""
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ ┌──────────────────────────────────┬───────────────┬───────────────────────────────┬───────────────────┬───────────────────┐ │
│ │ RowIdframe_nrlog_timeexample.MyColorexample.MyIndex │ │
│ │ --------------- │ │
│ │ type: "struct[2]"type: "i64"type: "timestamp(ns)"type: "list[u32]"type: "list[u64]" │ │
│ │ ARROW:extension:name: "TUID"is_sorted: ""is_sorted: ""kind: "data"kind: "data" │ │
│ │ kind: "control"kind: "time"kind: "time" ┆ ┆ │ │
│ ╞══════════════════════════════════╪═══════════════╪═══════════════════════════════╪═══════════════════╪═══════════════════╡ │
│ │ 0000000067816A6BB4B8C1254D40007B12025-01-10 18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │
│ └──────────────────────────────────┴───────────────┴───────────────────────────────┴───────────────────┴───────────────────┘ │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
]
}
1 change: 1 addition & 0 deletions crates/store/re_format_arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ all-features = true

[dependencies]
arrow2.workspace = true
itertools.workspace = true
re_tuid.workspace = true
re_types_core.workspace = true # tuid serialization

Expand Down
41 changes: 9 additions & 32 deletions crates/store/re_format_arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,13 @@ use re_types_core::Loggable as _;
type CustomFormatter<'a, F> = Box<dyn Fn(&mut F, usize) -> std::fmt::Result + 'a>;

fn get_custom_display<'a, F: std::fmt::Write + 'a>(
field: &Field,
array: &'a dyn Array,
null: &'static str,
) -> CustomFormatter<'a, F> {
// NOTE: If the top-level array is a list, it's probably not the type we're looking for: we're
// interested in the type of the array that's underneath.
let datatype = (|| match array.data_type().to_logical_type() {
DataType::List(_) => array
.as_any()
.downcast_ref::<ListArray<i32>>()?
.iter()
.next()?
.map(|array| array.data_type().clone()),
_ => Some(array.data_type().clone()),
})();

if let Some(DataType::Extension(name, _, _)) = datatype {
if let Some(extension_name) = field.metadata.get("ARROW:extension:name") {
// TODO(#1775): This should be registered dynamically.
if name.as_str() == Tuid::NAME {
if extension_name.as_str() == Tuid::ARROW_EXTENSION_NAME {
return Box::new(|w, index| {
if let Some(tuid) = parse_tuid(array, index) {
w.write_fmt(format_args!("{tuid}"))
Expand Down Expand Up @@ -244,21 +233,10 @@ where

outer_table.add_row({
let mut row = Row::new();
row.add_cell({
let cell = Cell::new(format!(
"CHUNK METADATA:\n{}",
DisplayMetadata(metadata, "* ")
));

#[cfg(not(target_arch = "wasm32"))] // requires TTY
{
cell.add_attribute(comfy_table::Attribute::Italic)
}
#[cfg(target_arch = "wasm32")]
{
cell
}
});
row.add_cell(Cell::new(format!(
"CHUNK METADATA:\n{}",
DisplayMetadata(metadata, "* ")
)));
row
});

Expand All @@ -280,9 +258,8 @@ where
});
table.set_header(header);

let displays = columns
.iter()
.map(|array| get_custom_display(&**array, "-"))
let displays = itertools::izip!(fields.iter(), columns.iter())
.map(|(field, array)| get_custom_display(field, &**array, "-"))
.collect::<Vec<_>>();
let num_rows = columns.first().map_or(0, |list_array| list_array.len());

Expand Down
14 changes: 11 additions & 3 deletions crates/store/re_log_encoding/src/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,7 @@ mod tests {
]
}

// TODO(#3741): should not be needed once the migration from arrow2 is complete
fn clear_arrow_extension_metadata(messages: &mut Vec<LogMsg>) {
for msg in messages {
if let LogMsg::ArrowMsg(_, arrow_msg) = msg {
Expand All @@ -472,7 +473,7 @@ mod tests {
fn test_encode_decode() {
let rrd_version = CrateVersion::LOCAL;

let messages = fake_log_messages();
let mut messages = fake_log_messages();

let options = [
EncodingOptions {
Expand Down Expand Up @@ -503,9 +504,14 @@ mod tests {
.collect::<Result<Vec<LogMsg>, DecodeError>>()
.unwrap();

// TODO(#3741): should not be needed once the migration from arrow2 is complete
clear_arrow_extension_metadata(&mut messages);
clear_arrow_extension_metadata(&mut decoded_messages);

assert_eq!(messages, decoded_messages);
assert!(
messages == decoded_messages,
"Got: {decoded_messages:#?}, expected: {messages:#?}"
);
}
}

Expand Down Expand Up @@ -536,7 +542,7 @@ mod tests {
let mut data = vec![];

// write "2 files" i.e. 2 streams that end with end-of-stream marker
let messages = fake_log_messages();
let mut messages = fake_log_messages();

// (2 encoders as each encoder writes a file header)
let writer = std::io::Cursor::new(&mut data);
Expand Down Expand Up @@ -565,6 +571,8 @@ mod tests {

let mut decoded_messages = decoder.into_iter().collect::<Result<Vec<_>, _>>().unwrap();

// TODO(#3741): should not be needed once the migration from arrow2 is complete
clear_arrow_extension_metadata(&mut messages);
clear_arrow_extension_metadata(&mut decoded_messages);

assert_eq!([messages.clone(), messages].concat(), decoded_messages);
Expand Down
2 changes: 1 addition & 1 deletion crates/store/re_types_core/src/tuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ impl Loggable for Tuid {
Self: 'a,
{
Err(crate::SerializationError::not_implemented(
Self::NAME,
Self::ARROW_EXTENSION_NAME,
"TUIDs are never nullable, use `to_arrow()` instead",
))
}
Expand Down
2 changes: 1 addition & 1 deletion crates/utils/re_tuid/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl Tuid {
/// We give an actual name to [`Tuid`], and inject that name into the Arrow datatype extensions,
/// as a hack so that we can compactly format them when printing Arrow data to the terminal.
/// Check out `re_format_arrow` for context.
pub const NAME: &'static str = "rerun.datatypes.TUID";
pub const ARROW_EXTENSION_NAME: &'static str = "rerun.datatypes.TUID";
}

impl std::fmt::Display for Tuid {
Expand Down

0 comments on commit 92949cf

Please sign in to comment.