Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix formatting of RowId/Tuid when printing ChunkStore #8656

Merged
merged 6 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3885,7 +3885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
dependencies = [
"cfg-if",
"windows-targets 0.48.5",
"windows-targets 0.52.6",
]

[[package]]
Expand Down Expand Up @@ -5203,7 +5203,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
dependencies = [
"bytes",
"heck 0.4.1",
"heck 0.5.0",
"itertools 0.13.0",
"log",
"multimap",
Expand Down Expand Up @@ -5686,6 +5686,7 @@ dependencies = [
"re_types_core",
"serde",
"similar-asserts",
"tap",
"thiserror 1.0.65",
]

Expand Down Expand Up @@ -5980,6 +5981,7 @@ name = "re_format_arrow"
version = "0.22.0-alpha.1+dev"
dependencies = [
"comfy-table",
"itertools 0.13.0",
"re_arrow2",
"re_tuid",
"re_types_core",
Expand Down Expand Up @@ -8483,6 +8485,12 @@ dependencies = [
"libc",
]

[[package]]
name = "tap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"

[[package]]
name = "target-lexicon"
version = "0.12.16"
Expand Down Expand Up @@ -9848,7 +9856,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.59.0",
]

[[package]]
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ strum_macros = "0.26"
sublime_fuzzy = "0.7"
syn = "2.0"
sysinfo = { version = "0.30.1", default-features = false }
tap = "1.0.1"
tempfile = "3.0"
thiserror = "1.0"
time = { version = "0.3.36", default-features = false, features = [
Expand Down
1 change: 1 addition & 0 deletions crates/store/re_chunk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ itertools.workspace = true
nohash-hasher.workspace = true
rand = { workspace = true, features = ["std_rng"] }
similar-asserts.workspace = true
tap.workspace = true
thiserror.workspace = true

# Optional dependencies:
Expand Down
4 changes: 2 additions & 2 deletions crates/store/re_chunk/src/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1355,11 +1355,11 @@ impl Chunk {
impl std::fmt::Display for Chunk {
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let chunk = self.to_transport().map_err(|err| {
let transport_chunk = self.to_transport().map_err(|err| {
re_log::error_once!("couldn't display Chunk: {err}");
std::fmt::Error
})?;
chunk.fmt(f)
transport_chunk.fmt(f)
}
}

Expand Down
11 changes: 10 additions & 1 deletion crates/store/re_chunk/src/transport.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use nohash_hasher::IntMap;
use re_byte_size::SizeBytes as _;
use re_log_types::{EntityPath, Timeline};
use re_types_core::{Component as _, ComponentDescriptor, Loggable as _};
use tap::Tap as _;

use crate::{chunk::ChunkComponents, Chunk, ChunkError, ChunkId, ChunkResult, RowId, TimeColumn};

Expand Down Expand Up @@ -434,7 +435,15 @@ impl Chunk {
row_ids.data_type().clone(),
false,
)
.with_metadata(TransportChunk::field_metadata_control_column()),
.with_metadata(
TransportChunk::field_metadata_control_column().tap_mut(|metadata| {
// This ensures the RowId/Tuid is formatted correctly
metadata.insert(
"ARROW:extension:name".to_owned(),
re_tuid::Tuid::ARROW_EXTENSION_NAME.to_owned(),
);
}),
),
);
columns.push(row_ids.clone().boxed());
}
Expand Down
48 changes: 48 additions & 0 deletions crates/store/re_chunk_store/tests/formatting.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use std::sync::Arc;

use re_chunk::{Chunk, ChunkId, RowId};
use re_chunk_store::ChunkStore;
use re_log_types::{
build_frame_nr, build_log_time,
example_components::{MyColor, MyIndex},
EntityPath, Time,
};
use re_types_core::ComponentBatch as _;

/// Ensure that `ChunkStore::to_string()` is nice and readable.
#[test]
fn format_chunk_store() -> anyhow::Result<()> {
re_log::setup_logging();

let mut store = ChunkStore::new(
re_log_types::StoreId::from_string(
re_log_types::StoreKind::Recording,
"test_id".to_owned(),
),
Default::default(),
);

let entity_path = EntityPath::from("this/that");

let (indices1, colors1) = (MyIndex::from_iter(0..3), MyColor::from_iter(0..3));

let chunk_id = ChunkId::from_u128(123_456_789_123_456_789_123_456_789);
let row_id = RowId::from_u128(32_033_410_000_000_000_000_000_000_123);

store.insert_chunk(&Arc::new(
Chunk::builder_with_id(chunk_id, entity_path.clone())
.with_serialized_batches(
row_id,
[
build_frame_nr(1),
build_log_time(Time::from_ns_since_epoch(1_736_534_622_123_456_789)),
],
[indices1.try_serialized()?, colors1.try_serialized()?],
)
.build()?,
))?;

insta::assert_snapshot!("format_chunk_store", store.to_string());

Ok(())
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
source: crates/store/re_chunk_store/tests/formatting.rs
expression: store.to_string()
---
ChunkStore {
id: test_id
config: ChunkStoreConfig { enable_changelog: true, chunk_max_bytes: 393216, chunk_max_rows: 4096, chunk_max_rows_if_unsorted: 1024 }
stats: {
num_chunks: 1
total_size_bytes: 1.0 KiB
num_rows: 1
num_events: 2
}
chunks: [
┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CHUNK METADATA: │
│ * entity_path: "/this/that" │
│ * heap_size_bytes: "838" │
│ * id: "661EFDF2E3B19F7C045F15" │
│ * is_sorted: "" │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ ┌──────────────────────────────────┬───────────────┬───────────────────────────────┬───────────────────┬───────────────────┐ │
│ │ RowId ┆ frame_nr ┆ log_time ┆ example.MyColor ┆ example.MyIndex │ │
│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │
│ │ type: "struct[2]" ┆ type: "i64" ┆ type: "timestamp(ns)" ┆ type: "list[u32]" ┆ type: "list[u64]" │ │
│ │ ARROW:extension:name: "TUID" ┆ is_sorted: "" ┆ is_sorted: "" ┆ kind: "data" ┆ kind: "data" │ │
│ │ kind: "control" ┆ kind: "time" ┆ kind: "time" ┆ ┆ │ │
│ ╞══════════════════════════════════╪═══════════════╪═══════════════════════════════╪═══════════════════╪═══════════════════╡ │
│ │ 0000000067816A6BB4B8C1254D40007B ┆ 1 ┆ 2025-01-10 18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │
│ └──────────────────────────────────┴───────────────┴───────────────────────────────┴───────────────────┴───────────────────┘ │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
]
}
1 change: 1 addition & 0 deletions crates/store/re_format_arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ all-features = true

[dependencies]
arrow2.workspace = true
itertools.workspace = true
re_tuid.workspace = true
re_types_core.workspace = true # tuid serialization

Expand Down
41 changes: 9 additions & 32 deletions crates/store/re_format_arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,13 @@ use re_types_core::Loggable as _;
type CustomFormatter<'a, F> = Box<dyn Fn(&mut F, usize) -> std::fmt::Result + 'a>;

fn get_custom_display<'a, F: std::fmt::Write + 'a>(
field: &Field,
array: &'a dyn Array,
null: &'static str,
) -> CustomFormatter<'a, F> {
// NOTE: If the top-level array is a list, it's probably not the type we're looking for: we're
// interested in the type of the array that's underneath.
let datatype = (|| match array.data_type().to_logical_type() {
DataType::List(_) => array
.as_any()
.downcast_ref::<ListArray<i32>>()?
.iter()
.next()?
.map(|array| array.data_type().clone()),
_ => Some(array.data_type().clone()),
})();

if let Some(DataType::Extension(name, _, _)) = datatype {
if let Some(extension_name) = field.metadata.get("ARROW:extension:name") {
// TODO(#1775): This should be registered dynamically.
if name.as_str() == Tuid::NAME {
if extension_name.as_str() == Tuid::ARROW_EXTENSION_NAME {
return Box::new(|w, index| {
if let Some(tuid) = parse_tuid(array, index) {
w.write_fmt(format_args!("{tuid}"))
Expand Down Expand Up @@ -244,21 +233,10 @@ where

outer_table.add_row({
let mut row = Row::new();
row.add_cell({
let cell = Cell::new(format!(
"CHUNK METADATA:\n{}",
DisplayMetadata(metadata, "* ")
));

#[cfg(not(target_arch = "wasm32"))] // requires TTY
{
cell.add_attribute(comfy_table::Attribute::Italic)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had to remove the italics, otherwise the formatting was different locally (tty) and on CI

}
#[cfg(target_arch = "wasm32")]
{
cell
}
});
row.add_cell(Cell::new(format!(
"CHUNK METADATA:\n{}",
DisplayMetadata(metadata, "* ")
)));
row
});

Expand All @@ -280,9 +258,8 @@ where
});
table.set_header(header);

let displays = columns
.iter()
.map(|array| get_custom_display(&**array, "-"))
let displays = itertools::izip!(fields.iter(), columns.iter())
.map(|(field, array)| get_custom_display(field, &**array, "-"))
.collect::<Vec<_>>();
let num_rows = columns.first().map_or(0, |list_array| list_array.len());

Expand Down
14 changes: 11 additions & 3 deletions crates/store/re_log_encoding/src/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,7 @@ mod tests {
]
}

// TODO(#3741): should not be needed once the migration from arrow2 is complete
fn clear_arrow_extension_metadata(messages: &mut Vec<LogMsg>) {
for msg in messages {
if let LogMsg::ArrowMsg(_, arrow_msg) = msg {
Expand All @@ -472,7 +473,7 @@ mod tests {
fn test_encode_decode() {
let rrd_version = CrateVersion::LOCAL;

let messages = fake_log_messages();
let mut messages = fake_log_messages();

let options = [
EncodingOptions {
Expand Down Expand Up @@ -503,9 +504,14 @@ mod tests {
.collect::<Result<Vec<LogMsg>, DecodeError>>()
.unwrap();

// TODO(#3741): should not be needed once the migration from arrow2 is complete
clear_arrow_extension_metadata(&mut messages);
clear_arrow_extension_metadata(&mut decoded_messages);

assert_eq!(messages, decoded_messages);
assert!(
messages == decoded_messages,
"Got: {decoded_messages:#?}, expected: {messages:#?}"
);
}
}

Expand Down Expand Up @@ -536,7 +542,7 @@ mod tests {
let mut data = vec![];

// write "2 files" i.e. 2 streams that end with end-of-stream marker
let messages = fake_log_messages();
let mut messages = fake_log_messages();

// (2 encoders as each encoder writes a file header)
let writer = std::io::Cursor::new(&mut data);
Expand Down Expand Up @@ -565,6 +571,8 @@ mod tests {

let mut decoded_messages = decoder.into_iter().collect::<Result<Vec<_>, _>>().unwrap();

// TODO(#3741): should not be needed once the migration from arrow2 is complete
clear_arrow_extension_metadata(&mut messages);
clear_arrow_extension_metadata(&mut decoded_messages);

assert_eq!([messages.clone(), messages].concat(), decoded_messages);
Expand Down
2 changes: 1 addition & 1 deletion crates/store/re_types_core/src/tuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ impl Loggable for Tuid {
Self: 'a,
{
Err(crate::SerializationError::not_implemented(
Self::NAME,
Self::ARROW_EXTENSION_NAME,
"TUIDs are never nullable, use `to_arrow()` instead",
))
}
Expand Down
2 changes: 1 addition & 1 deletion crates/utils/re_tuid/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl Tuid {
/// We give an actual name to [`Tuid`], and inject that name into the Arrow datatype extensions,
/// as a hack so that we can compactly format them when printing Arrow data to the terminal.
/// Check out `re_format_arrow` for context.
pub const NAME: &'static str = "rerun.datatypes.TUID";
pub const ARROW_EXTENSION_NAME: &'static str = "rerun.datatypes.TUID";
}

impl std::fmt::Display for Tuid {
Expand Down
Loading