From 7cb2adb5e6d2ebdff5f1685d419de638e7c74f23 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 19 Jan 2024 08:54:01 +0100 Subject: [PATCH 1/4] get rid of 'polars' feature and dataframe_util --- Cargo.lock | 1 - crates/re_query/Cargo.toml | 17 +- crates/re_query/src/dataframe_util.rs | 216 -------------------------- crates/re_query/src/lib.rs | 7 - 4 files changed, 1 insertion(+), 240 deletions(-) delete mode 100644 crates/re_query/src/dataframe_util.rs diff --git a/Cargo.lock b/Cargo.lock index 336780852773..764f15808700 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4800,7 +4800,6 @@ dependencies = [ "document-features", "itertools 0.12.0", "mimalloc", - "polars-core", "rand", "re_data_store", "re_entity_db", diff --git a/crates/re_query/Cargo.toml b/crates/re_query/Cargo.toml index e6125aeeeb46..5b40c5b34bc3 100644 --- a/crates/re_query/Cargo.toml +++ b/crates/re_query/Cargo.toml @@ -19,9 +19,7 @@ all-features = true [features] default = [] -## Polars support -polars = ["dep:polars-core", "re_data_store/polars"] - +# TODO: let's get rid of that too while we're here testing = ["re_log_types/testing"] [dependencies] @@ -42,13 +40,6 @@ itertools = { workspace = true } smallvec.workspace = true thiserror.workspace = true -# Optional dependencies: -polars-core = { workspace = true, optional = true, features = [ - "dtype-date", - "dtype-time", - "dtype-struct", -] } - [dev-dependencies] re_types = { workspace = true, features = ["datagen"] } @@ -56,12 +47,6 @@ re_types = { workspace = true, features = ["datagen"] } criterion.workspace = true itertools = { workspace = true } mimalloc.workspace = true -polars-core = { workspace = true, features = [ - "dtype-date", - "dtype-time", - "dtype-struct", - "fmt", -] } rand = { workspace = true, features = ["std", "std_rng"] } [lib] diff --git a/crates/re_query/src/dataframe_util.rs b/crates/re_query/src/dataframe_util.rs deleted file mode 100644 index 2567046f072c..000000000000 --- a/crates/re_query/src/dataframe_util.rs +++ /dev/null @@ -1,216 +0,0 @@ -use arrow2::{ - array::{Array, StructArray}, - datatypes::PhysicalType, -}; -use polars_core::prelude::*; -use re_data_store::ArrayExt; -use re_types_core::{components::InstanceKey, Archetype, Component, Loggable}; - -use crate::{ArchetypeView, ComponentWithInstances, QueryError}; - -/// Make it so that our arrays can be deserialized again by `polars`. -fn fix_polars_nulls(array: &dyn Array) -> Box { - // TODO(jleibs): This is an ugly work-around but gets our serializers - // working again - // - // Explanation: Polars Series appear make all fields nullable. Polars - // doesn't even have a way to express non-nullable types. However, our - // `field_types` `Component` definitions have non-nullable fields. This - // causes a "Data type mismatch" on the deserialization and keeps us from - // getting at our data. - // - // This definitely impacts Struct types, but doesn't seem to cause an issue - // for primitives - match array.data_type().to_physical_type() { - PhysicalType::Struct => { - let phys_arrow = array.as_any().downcast_ref::().unwrap(); - // Polars doesn't store validity for the top-level of a Struct, only - // its fields. so use that validity of the first field as the - // validity for the whole structure. - - // TODO(jleibs): This might have issues with complex structs that - // include optional fields. - let validity = phys_arrow.values()[0].validity(); - let fixed_arrow = StructArray::new( - C::arrow_datatype(), - phys_arrow.clone().into_data().1, - validity.cloned(), - ); - Box::new(fixed_arrow) - } - _ => array.to_boxed(), - } -} - -/// Iterator for a single column in a dataframe as the rust-native Component type -pub fn iter_column<'a, C: Component + 'a>( - df: &'a DataFrame, -) -> impl Iterator> + 'a { - let res = match df.column(C::name().as_ref()) { - Ok(col) => itertools::Either::Left(col.chunks().iter().flat_map(|array| { - let fixed_array = fix_polars_nulls::(array.as_ref()); - C::from_arrow_opt(fixed_array.as_ref()).unwrap() - })), - Err(_) => itertools::Either::Right(std::iter::repeat_with(|| None).take(df.height())), - }; - res.into_iter() -} - -pub fn df_builder1<'a, C0>(c0: &'a [Option]) -> crate::Result -where - C0: re_types_core::Component + Clone + 'a, - &'a C0: Into<::std::borrow::Cow<'a, C0>>, -{ - let array0 = C0::to_arrow_opt(c0.iter().map(|c| c.as_ref()))?; - - let series0 = Series::try_from((C0::name().as_ref(), array0.as_ref().clean_for_polars()))?; - - Ok(DataFrame::new(vec![series0])?) -} - -pub fn df_builder2<'a, C0, C1>( - c0: &'a [Option], - c1: &'a [Option], -) -> crate::Result -where - C0: re_types_core::Component + Clone + 'a, - C1: re_types_core::Component + Clone + 'a, - &'a C0: Into<::std::borrow::Cow<'a, C0>>, - &'a C1: Into<::std::borrow::Cow<'a, C1>>, -{ - let array0 = C0::to_arrow_opt(c0.iter().map(|c| c.as_ref()))?; - let array1 = C1::to_arrow_opt(c1.iter().map(|c| c.as_ref()))?; - - let series0 = Series::try_from((C0::name().as_ref(), array0.as_ref().clean_for_polars()))?; - let series1 = Series::try_from((C1::name().as_ref(), array1.as_ref().clean_for_polars()))?; - - Ok(DataFrame::new(vec![series0, series1])?) -} - -pub fn df_builder3<'a, C0, C1, C2>( - c0: &'a [Option], - c1: &'a [Option], - c2: &'a [Option], -) -> crate::Result -where - C0: re_types_core::Component + Clone + 'a, - C1: re_types_core::Component + Clone + 'a, - C2: re_types_core::Component + Clone + 'a, - &'a C0: Into<::std::borrow::Cow<'a, C0>>, - &'a C1: Into<::std::borrow::Cow<'a, C1>>, - &'a C2: Into<::std::borrow::Cow<'a, C2>>, -{ - let array0 = C0::to_arrow_opt(c0.iter().map(|c| c.as_ref()))?; - let array1 = C1::to_arrow_opt(c1.iter().map(|c| c.as_ref()))?; - let array2 = C2::to_arrow_opt(c2.iter().map(|c| c.as_ref()))?; - - let series0 = Series::try_from((C0::name().as_ref(), array0.as_ref().clean_for_polars()))?; - let series1 = Series::try_from((C1::name().as_ref(), array1.as_ref().clean_for_polars()))?; - let series2 = Series::try_from((C2::name().as_ref(), array2.as_ref().clean_for_polars()))?; - - Ok(DataFrame::new(vec![series0, series1, series2])?) -} - -impl ComponentWithInstances { - pub fn as_df<'a, C0: Component + 'a>(&'a self) -> crate::Result { - if C0::name() != self.name() { - return Err(QueryError::TypeMismatch { - actual: self.name(), - requested: C0::name(), - }); - } - - let array0 = self.instance_keys.as_arrow_ref(); - let array1 = self.values.as_arrow_ref(); - - let series0 = Series::try_from(( - InstanceKey::name().as_ref(), - array0.as_ref().clean_for_polars(), - ))?; - let series1 = Series::try_from((C0::name().as_ref(), array1.as_ref().clean_for_polars()))?; - - Ok(DataFrame::new(vec![series0, series1])?) - } -} - -impl ArchetypeView { - pub fn as_df1< - 'a, - C1: re_types_core::Component + Clone + Into<::std::borrow::Cow<'a, C1>> + 'a, - >( - &self, - ) -> crate::Result { - let array0 = InstanceKey::to_arrow(self.iter_instance_keys())?; - let array1 = C1::to_arrow_opt(self.iter_optional_component::()?)?; - - let series0 = Series::try_from(( - InstanceKey::name().as_ref(), - array0.as_ref().clean_for_polars(), - ))?; - let series1 = Series::try_from((C1::name().as_ref(), array1.as_ref().clean_for_polars()))?; - - Ok(DataFrame::new(vec![series0, series1])?) - } - - pub fn as_df2< - 'a, - C1: re_types_core::Component + Clone + Into<::std::borrow::Cow<'a, C1>> + 'a, - C2: re_types_core::Component + Clone + Into<::std::borrow::Cow<'a, C2>> + 'a, - >( - &self, - ) -> crate::Result { - let array0 = InstanceKey::to_arrow(self.iter_instance_keys())?; - let array1 = C1::to_arrow_opt(self.iter_optional_component::()?)?; - let array2 = C2::to_arrow_opt(self.iter_optional_component::()?)?; - - let series0 = Series::try_from(( - InstanceKey::name().as_ref(), - array0.as_ref().clean_for_polars(), - ))?; - let series1 = Series::try_from((C1::name().as_ref(), array1.as_ref().clean_for_polars()))?; - let series2 = Series::try_from((C2::name().as_ref(), array2.as_ref().clean_for_polars()))?; - - Ok(DataFrame::new(vec![series0, series1, series2])?) - } -} - -#[test] -fn test_df_builder() { - use re_types::components::{Color, Radius}; - - let radii = vec![ - Some(Radius(1.0)), - Some(Radius(3.0)), - Some(Radius(5.0)), - Some(Radius(7.0)), - ]; - - let colors = vec![ - None, - Some(Color::from(0xff000000)), - Some(Color::from(0x00ff0000)), - None, - ]; - - let df = df_builder2(&radii, &colors).unwrap(); - eprintln!("{df:?}"); - // - // ┌──────────────┬─────────────────┐ - // │ Radius ┆ Color │ - // │ --- ┆ --- │ - // │ f32 ┆ u32 │ - // ╞══════════════╪═════════════════╡ - // │ 1.0 ┆ null │ - // │ 3.0 ┆ 4278190080 │ - // │ 5.0 ┆ 16711680 │ - // │ 7.0 ┆ null │ - // └──────────────┴─────────────────┘ - - let expected = df![ - Radius::name().as_ref() => &[1f32, 3f32, 5f32, 7f32], - Color::name().as_ref() => &[None, Some(0xff000000_u32), Some(0x00ff0000_u32), None], - ] - .unwrap(); - - assert_eq!(df, expected); -} diff --git a/crates/re_query/src/lib.rs b/crates/re_query/src/lib.rs index ccd1c0f6d2eb..3fbab0796251 100644 --- a/crates/re_query/src/lib.rs +++ b/crates/re_query/src/lib.rs @@ -11,9 +11,6 @@ mod query; mod range; mod util; -#[cfg(feature = "polars")] -pub mod dataframe_util; - pub use self::archetype_view::{ArchetypeView, ComponentWithInstances}; pub use self::query::{get_component_with_instances, query_archetype}; pub use self::range::range_archetype; @@ -63,10 +60,6 @@ pub enum QueryError { #[error("Error converting arrow data: {0}")] ArrowError(#[from] arrow2::error::Error), - #[cfg(feature = "polars")] - #[error("Error from within Polars")] - PolarsError(#[from] polars_core::prelude::PolarsError), - #[error("Not implemented")] NotImplemented, } From aa7584f6ca0355d9dfc8433fe785a523dae77da1 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 19 Jan 2024 09:08:56 +0100 Subject: [PATCH 2/4] make componentwithinstances Display and clean accordingly --- crates/re_query/src/archetype_view.rs | 14 ++++++++++++++ crates/re_query/src/lib.rs | 6 ------ crates/re_query/src/query.rs | 10 ++-------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/crates/re_query/src/archetype_view.rs b/crates/re_query/src/archetype_view.rs index 222d4b939a14..7bd627243e7b 100644 --- a/crates/re_query/src/archetype_view.rs +++ b/crates/re_query/src/archetype_view.rs @@ -19,6 +19,20 @@ pub struct ComponentWithInstances { pub(crate) values: DataCell, } +impl std::fmt::Display for ComponentWithInstances { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let table = arrow::format_table( + [ + self.instance_keys.as_arrow_ref(), + self.values.as_arrow_ref(), + ], + ["InstanceKey", self.values.component_name().as_ref()], + ); + + f.write_fmt(format_args!("ComponentWithInstances:\n{table}")) + } +} + impl ComponentWithInstances { /// Name of the [`Component`] #[inline] diff --git a/crates/re_query/src/lib.rs b/crates/re_query/src/lib.rs index 3fbab0796251..923234b8ebb7 100644 --- a/crates/re_query/src/lib.rs +++ b/crates/re_query/src/lib.rs @@ -1,10 +1,4 @@ //! Provide query-centric access to the [`re_data_store`]. -//! -//! ## Feature flags -#![doc = document_features::document_features!()] -//! - -// TODO(jleibs) better crate documentation. mod archetype_view; mod query; diff --git a/crates/re_query/src/query.rs b/crates/re_query/src/query.rs index 8029057916b6..8745aebd0121 100644 --- a/crates/re_query/src/query.rs +++ b/crates/re_query/src/query.rs @@ -28,10 +28,7 @@ use crate::{ArchetypeView, ComponentWithInstances, QueryError}; /// ) /// .unwrap(); /// -/// # #[cfg(feature = "polars")] -/// let df = component.as_df::().unwrap(); -/// -/// //println!("{df:?}"); +/// //println!("{component}"); /// ``` /// /// Outputs: @@ -99,10 +96,7 @@ pub fn get_component_with_instances( /// ) /// .unwrap(); /// -/// # #[cfg(feature = "polars")] -/// let df = arch_view.as_df2::().unwrap(); -/// -/// //println!("{df:?}"); +/// //println!("{arch_view:?}"); /// ``` /// /// Outputs: From 58f8f08dbe93de35a7043c3e5a02bce491c3b88d Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 19 Jan 2024 09:12:05 +0100 Subject: [PATCH 3/4] get rid of superfluous testing flag --- crates/re_query/Cargo.toml | 4 +--- crates/re_query/src/lib.rs | 2 +- crates/re_query/src/query.rs | 18 +----------------- 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/crates/re_query/Cargo.toml b/crates/re_query/Cargo.toml index 5b40c5b34bc3..c62f38470f09 100644 --- a/crates/re_query/Cargo.toml +++ b/crates/re_query/Cargo.toml @@ -19,9 +19,6 @@ all-features = true [features] default = [] -# TODO: let's get rid of that too while we're here -testing = ["re_log_types/testing"] - [dependencies] # Rerun dependencies: re_data_store.workspace = true @@ -42,6 +39,7 @@ thiserror.workspace = true [dev-dependencies] +re_log_types = { workspace = true, features = ["testing"] } re_types = { workspace = true, features = ["datagen"] } criterion.workspace = true diff --git a/crates/re_query/src/lib.rs b/crates/re_query/src/lib.rs index 923234b8ebb7..91345299df53 100644 --- a/crates/re_query/src/lib.rs +++ b/crates/re_query/src/lib.rs @@ -11,7 +11,7 @@ pub use self::range::range_archetype; pub use self::util::query_archetype_with_history; // Used for doc-tests -#[cfg(feature = "testing")] +#[doc(hidden)] pub use self::query::__populate_example_store; #[derive(Debug, Clone, Copy)] diff --git a/crates/re_query/src/query.rs b/crates/re_query/src/query.rs index 8745aebd0121..7a854f782dda 100644 --- a/crates/re_query/src/query.rs +++ b/crates/re_query/src/query.rs @@ -8,9 +8,6 @@ use crate::{ArchetypeView, ComponentWithInstances, QueryError}; /// /// Returns `None` if the component is not found. /// -#[cfg_attr( - feature = "testing", - doc = r##" /// ``` /// # use re_data_store::LatestAtQuery; /// # use re_log_types::{Timeline, example_components::{MyColor, MyPoint}}; @@ -20,7 +17,7 @@ use crate::{ArchetypeView, ComponentWithInstances, QueryError}; /// let ent_path = "point"; /// let query = LatestAtQuery::new(Timeline::new_sequence("frame_nr"), 123.into()); /// -/// let (_, component) = re_query::get_component_with_instances( +/// let (_, _, component) = re_query::get_component_with_instances( /// &store, /// &query, /// &ent_path.into(), @@ -43,8 +40,6 @@ use crate::{ArchetypeView, ComponentWithInstances, QueryError}; /// │ 96 ┆ {3.0,4.0} │ /// └─────────────┴───────────┘ /// ``` -"## -)] pub fn get_component_with_instances( store: &DataStore, query: &LatestAtQuery, @@ -76,10 +71,6 @@ pub fn get_component_with_instances( /// If you expect only one instance (e.g. for mono-components like `Transform` `Tensor`] /// and have no additional components you can use [`DataStore::query_latest_component`] instead. /// -/// -#[cfg_attr( - feature = "testing", - doc = r##" /// ``` /// # use re_data_store::LatestAtQuery; /// # use re_log_types::{Timeline, example_components::{MyColor, MyPoint, MyPoints}}; @@ -110,8 +101,6 @@ pub fn get_component_with_instances( /// │ 96 ┆ {3.0,4.0} ┆ 4278190080 │ /// └────────────────────┴───────────────┴─────────────────┘ /// ``` -"## -)] pub fn query_archetype( store: &DataStore, query: &LatestAtQuery, @@ -170,7 +159,6 @@ pub fn query_archetype( } /// Helper used to create an example store we can use for querying in doctests -#[cfg(feature = "testing")] pub fn __populate_example_store() -> DataStore { use re_log_types::example_components::{MyColor, MyPoint}; use re_log_types::{build_frame_nr, DataRow}; @@ -215,8 +203,6 @@ pub fn __populate_example_store() -> DataStore { // Minimal test matching the doctest for `get_component_with_instances` #[test] -#[cfg(test)] -#[cfg(feature = "testing")] fn simple_get_component() { use smallvec::smallvec; @@ -250,8 +236,6 @@ fn simple_get_component() { // Minimal test matching the doctest for `query_entity_with_primary` #[test] -#[cfg(test)] -#[cfg(feature = "testing")] fn simple_query_archetype() { use re_data_store::LatestAtQuery; use re_log_types::example_components::{MyColor, MyPoint, MyPoints}; From 785aa600a900a31cefe2e9889093add67c0f7ba2 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 19 Jan 2024 09:39:34 +0100 Subject: [PATCH 4/4] get rid of re_log_types' extremely annoying 'testing' feature flag --- crates/re_data_store/Cargo.toml | 1 - crates/re_entity_db/Cargo.toml | 1 - crates/re_log_types/Cargo.toml | 2 -- crates/re_log_types/src/data_table.rs | 1 - crates/re_log_types/src/lib.rs | 1 - crates/re_query/Cargo.toml | 1 - crates/re_query_cache/Cargo.toml | 1 - crates/re_sdk/Cargo.toml | 1 - crates/re_space_view/Cargo.toml | 1 - 9 files changed, 10 deletions(-) diff --git a/crates/re_data_store/Cargo.toml b/crates/re_data_store/Cargo.toml index 7302a3861135..314fcb452282 100644 --- a/crates/re_data_store/Cargo.toml +++ b/crates/re_data_store/Cargo.toml @@ -66,7 +66,6 @@ polars-ops = { workspace = true, optional = true, features = [ [dev-dependencies] -re_log_types = { workspace = true, features = ["testing"] } re_types = { workspace = true, features = ["datagen", "testing"] } anyhow.workspace = true diff --git a/crates/re_entity_db/Cargo.toml b/crates/re_entity_db/Cargo.toml index 63630f0cfca8..eeac12b55eeb 100644 --- a/crates/re_entity_db/Cargo.toml +++ b/crates/re_entity_db/Cargo.toml @@ -49,7 +49,6 @@ web-time.workspace = true [dev-dependencies] re_log_encoding = { workspace = true, features = ["decoder", "encoder"] } -re_log_types = { workspace = true, features = ["testing"] } re_types = { workspace = true, features = ["datagen"] } anyhow.workspace = true diff --git a/crates/re_log_types/Cargo.toml b/crates/re_log_types/Cargo.toml index 89b3157be2d7..d4dcbf9ae9e9 100644 --- a/crates/re_log_types/Cargo.toml +++ b/crates/re_log_types/Cargo.toml @@ -28,8 +28,6 @@ serde = [ "re_types_core/serde", ] -testing = [] - [dependencies] # Rerun diff --git a/crates/re_log_types/src/data_table.rs b/crates/re_log_types/src/data_table.rs index 0fc2567758f6..dcb0f7c69e0f 100644 --- a/crates/re_log_types/src/data_table.rs +++ b/crates/re_log_types/src/data_table.rs @@ -1302,7 +1302,6 @@ impl DataTable { /// Crafts a simple but interesting [`DataTable`]. #[cfg(not(target_arch = "wasm32"))] -#[cfg(feature = "testing")] impl DataTable { pub fn example(timeless: bool) -> DataTable { use crate::{ diff --git a/crates/re_log_types/src/lib.rs b/crates/re_log_types/src/lib.rs index 1eca325a255b..36526feac807 100644 --- a/crates/re_log_types/src/lib.rs +++ b/crates/re_log_types/src/lib.rs @@ -21,7 +21,6 @@ pub mod arrow_msg; mod data_cell; mod data_row; mod data_table; -#[cfg(feature = "testing")] pub mod example_components; pub mod hash; mod num_instances; diff --git a/crates/re_query/Cargo.toml b/crates/re_query/Cargo.toml index c62f38470f09..deb3978bf07e 100644 --- a/crates/re_query/Cargo.toml +++ b/crates/re_query/Cargo.toml @@ -39,7 +39,6 @@ thiserror.workspace = true [dev-dependencies] -re_log_types = { workspace = true, features = ["testing"] } re_types = { workspace = true, features = ["datagen"] } criterion.workspace = true diff --git a/crates/re_query_cache/Cargo.toml b/crates/re_query_cache/Cargo.toml index 3d1db1faa8e3..a8460f550907 100644 --- a/crates/re_query_cache/Cargo.toml +++ b/crates/re_query_cache/Cargo.toml @@ -46,7 +46,6 @@ web-time.workspace = true [dev-dependencies] -re_log_types = { workspace = true, features = ["testing"] } re_types = { workspace = true, features = ["datagen"] } criterion.workspace = true diff --git a/crates/re_sdk/Cargo.toml b/crates/re_sdk/Cargo.toml index c3500f0b0ab3..91059a5387f4 100644 --- a/crates/re_sdk/Cargo.toml +++ b/crates/re_sdk/Cargo.toml @@ -70,7 +70,6 @@ webbrowser = { workspace = true, optional = true } [dev-dependencies] re_data_store.workspace = true -re_log_types = { workspace = true, features = ["testing"] } itertools.workspace = true ndarray-rand.workspace = true diff --git a/crates/re_space_view/Cargo.toml b/crates/re_space_view/Cargo.toml index 6884f1e0cbdc..0a30f8cbc1bb 100644 --- a/crates/re_space_view/Cargo.toml +++ b/crates/re_space_view/Cargo.toml @@ -17,7 +17,6 @@ all-features = true [features] default = [] -testing = ["re_log_types/testing"] [dependencies] re_entity_db.workspace = true