From bdeb4651a5ceb1db0612838a1bf34905adf5bf84 Mon Sep 17 00:00:00 2001 From: intoraw Date: Sat, 4 May 2024 21:52:41 +0800 Subject: [PATCH 1/2] chore: remove DataPtr trait since Arc::ptr_eq ignores pointer metadata --- datafusion/common/src/utils.rs | 28 ------------------- .../src/physical_optimizer/join_selection.rs | 5 ++-- .../physical-expr-common/src/physical_expr.rs | 3 +- datafusion/physical-plan/src/lib.rs | 3 +- 4 files changed, 4 insertions(+), 35 deletions(-) diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs index 102e4d73083e..1181c761abb9 100644 --- a/datafusion/common/src/utils.rs +++ b/datafusion/common/src/utils.rs @@ -525,34 +525,6 @@ pub fn list_ndims(data_type: &DataType) -> u64 { } } -/// An extension trait for smart pointers. Provides an interface to get a -/// raw pointer to the data (with metadata stripped away). -/// -/// This is useful to see if two smart pointers point to the same allocation. -pub trait DataPtr { - /// Returns a raw pointer to the data, stripping away all metadata. - fn data_ptr(this: &Self) -> *const (); - - /// Check if two pointers point to the same data. - fn data_ptr_eq(this: &Self, other: &Self) -> bool { - // Discard pointer metadata (including the v-table). - let this = Self::data_ptr(this); - let other = Self::data_ptr(other); - - std::ptr::eq(this, other) - } -} - -// Currently, it's brittle to compare `Arc`s of dyn traits with `Arc::ptr_eq` -// due to this check including v-table equality. It may be possible to use -// `Arc::ptr_eq` directly if a fix to https://github.com/rust-lang/rust/issues/103763 -// is stabilized. -impl DataPtr for Arc { - fn data_ptr(this: &Self) -> *const () { - Arc::as_ptr(this) as *const () - } -} - /// Adopted from strsim-rs for string similarity metrics pub mod datafusion_strsim { // Source: https://github.com/dguo/strsim-rs/blob/master/src/lib.rs diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs index b670e8297147..042a0198bfb5 100644 --- a/datafusion/core/src/physical_optimizer/join_selection.rs +++ b/datafusion/core/src/physical_optimizer/join_selection.rs @@ -1544,7 +1544,6 @@ mod hash_join_tests { use arrow::datatypes::{DataType, Field}; use arrow::record_batch::RecordBatch; - use datafusion_common::utils::DataPtr; struct TestCase { case: String, @@ -1937,8 +1936,8 @@ mod hash_join_tests { .. }) = plan.as_any().downcast_ref::() { - let left_changed = Arc::data_ptr_eq(left, &right_exec); - let right_changed = Arc::data_ptr_eq(right, &left_exec); + let left_changed = Arc::ptr_eq(left, &right_exec); + let right_changed = Arc::ptr_eq(right, &left_exec); // If this is not equal, we have a bigger problem. assert_eq!(left_changed, right_changed); assert_eq!( diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs index be6358e73c99..a0f8bdf10377 100644 --- a/datafusion/physical-expr-common/src/physical_expr.rs +++ b/datafusion/physical-expr-common/src/physical_expr.rs @@ -24,7 +24,6 @@ use arrow::array::BooleanArray; use arrow::compute::filter_record_batch; use arrow::datatypes::{DataType, Schema}; use arrow::record_batch::RecordBatch; -use datafusion_common::utils::DataPtr; use datafusion_common::{internal_err, not_impl_err, Result}; use datafusion_expr::interval_arithmetic::Interval; use datafusion_expr::ColumnarValue; @@ -188,7 +187,7 @@ pub fn with_new_children_if_necessary( || children .iter() .zip(old_children.iter()) - .any(|(c1, c2)| !Arc::data_ptr_eq(c1, c2)) + .any(|(c1, c2)| !Arc::ptr_eq(c1, c2)) { Ok(expr.with_new_children(children)?) } else { diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs index cd2be33e86c1..8d8a3e71031e 100644 --- a/datafusion/physical-plan/src/lib.rs +++ b/datafusion/physical-plan/src/lib.rs @@ -30,7 +30,6 @@ use crate::sorts::sort_preserving_merge::SortPreservingMergeExec; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; use datafusion_common::config::ConfigOptions; -use datafusion_common::utils::DataPtr; use datafusion_common::Result; use datafusion_execution::TaskContext; use datafusion_physical_expr::{ @@ -684,7 +683,7 @@ pub fn with_new_children_if_necessary( || children .iter() .zip(old_children.iter()) - .any(|(c1, c2)| !Arc::data_ptr_eq(c1, c2)) + .any(|(c1, c2)| !Arc::ptr_eq(c1, c2)) { plan.with_new_children(children) } else { From 096f7b93c86b69625386b802dccc9eb87ba59457 Mon Sep 17 00:00:00 2001 From: intoraw Date: Sun, 5 May 2024 21:49:29 +0800 Subject: [PATCH 2/2] fix: remove DataPtr unit tests --- datafusion/common/src/utils.rs | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs index 1181c761abb9..33d7313aa0e0 100644 --- a/datafusion/common/src/utils.rs +++ b/datafusion/common/src/utils.rs @@ -946,26 +946,6 @@ mod tests { assert_eq!(longest_consecutive_prefix([1, 2, 3, 4]), 0); } - #[test] - fn arc_data_ptr_eq() { - let x = Arc::new(()); - let y = Arc::new(()); - let y_clone = Arc::clone(&y); - - assert!( - Arc::data_ptr_eq(&x, &x), - "same `Arc`s should point to same data" - ); - assert!( - !Arc::data_ptr_eq(&x, &y), - "different `Arc`s should point to different data" - ); - assert!( - Arc::data_ptr_eq(&y, &y_clone), - "cloned `Arc` should point to same data as the original" - ); - } - #[test] fn test_merge_and_order_indices() { assert_eq!(