-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add ScalarValue::try_as_str
to get str value from logical strings
#14167
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2849,6 +2849,50 @@ impl ScalarValue { | |
ScalarValue::from(value).cast_to(target_type) | ||
} | ||
|
||
/// Returns the Some(`&str`) representation of `ScalarValue` of logical string type | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💯 doc |
||
/// | ||
/// Returns `None` if this `ScalarValue` is not a logical string type or the | ||
/// `ScalarValue` represents the `NULL` value. | ||
/// | ||
/// Note you can use [`Option::flatten`] to check for non null logical | ||
/// strings. | ||
/// | ||
/// For example, [`ScalarValue::Utf8`], [`ScalarValue::LargeUtf8`], and | ||
/// [`ScalarValue::Dictionary`] with a logical string value and store | ||
/// strings and can be accessed as `&str` using this method. | ||
/// | ||
/// # Example: logical strings | ||
/// ``` | ||
/// # use datafusion_common::ScalarValue; | ||
/// /// non strings return None | ||
/// let scalar = ScalarValue::from(42); | ||
/// assert_eq!(scalar.try_as_str(), None); | ||
/// // Non null logical string returns Some(Some(&str)) | ||
/// let scalar = ScalarValue::from("hello"); | ||
/// assert_eq!(scalar.try_as_str(), Some(Some("hello"))); | ||
/// // Null logical string returns Some(None) | ||
/// let scalar = ScalarValue::Utf8(None); | ||
/// assert_eq!(scalar.try_as_str(), Some(None)); | ||
/// ``` | ||
/// | ||
/// # Example: use [`Option::flatten`] to check for non-null logical strings | ||
/// ``` | ||
/// # use datafusion_common::ScalarValue; | ||
/// // Non null logical string returns Some(Some(&str)) | ||
/// let scalar = ScalarValue::from("hello"); | ||
/// assert_eq!(scalar.try_as_str().flatten(), Some("hello")); | ||
/// ``` | ||
pub fn try_as_str(&self) -> Option<Option<&str>> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not (Also, most of the use cases in this PR are converting a returned None to an error). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for the review @wiedld
|
||
let v = match self { | ||
ScalarValue::Utf8(v) => v, | ||
ScalarValue::LargeUtf8(v) => v, | ||
ScalarValue::Utf8View(v) => v, | ||
ScalarValue::Dictionary(_, v) => return v.try_as_str(), | ||
_ => return None, | ||
}; | ||
Some(v.as_ref().map(|v| v.as_str())) | ||
} | ||
|
||
/// Try to cast this value to a ScalarValue of type `data_type` | ||
pub fn cast_to(&self, target_type: &DataType) -> Result<Self> { | ||
self.cast_to_with_options(target_type, &DEFAULT_CAST_OPTIONS) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -108,15 +108,14 @@ impl AggregateUDFImpl for StringAgg { | |
|
||
fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> { | ||
if let Some(lit) = acc_args.exprs[1].as_any().downcast_ref::<Literal>() { | ||
return match lit.value() { | ||
ScalarValue::Utf8(Some(delimiter)) | ||
| ScalarValue::LargeUtf8(Some(delimiter)) => { | ||
Ok(Box::new(StringAggAccumulator::new(delimiter.as_str()))) | ||
return match lit.value().try_as_str() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a pretty good example of can reducing the repetition in the code to check for string literal values. This also now implicitly will work for Dictionary values where it would not have before |
||
Some(Some(delimiter)) => { | ||
Ok(Box::new(StringAggAccumulator::new(delimiter))) | ||
} | ||
Some(None) => Ok(Box::new(StringAggAccumulator::new(""))), | ||
None => { | ||
not_impl_err!("StringAgg not supported for delimiter {}", lit.value()) | ||
} | ||
ScalarValue::Utf8(None) | ||
| ScalarValue::LargeUtf8(None) | ||
| ScalarValue::Null => Ok(Box::new(StringAggAccumulator::new(""))), | ||
e => not_impl_err!("StringAgg not supported for delimiter {}", e), | ||
}; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -121,11 +121,9 @@ pub fn digest(args: &[ColumnarValue]) -> Result<ColumnarValue> { | |
); | ||
} | ||
let digest_algorithm = match &args[1] { | ||
ColumnarValue::Scalar(scalar) => match scalar { | ||
ScalarValue::Utf8View(Some(method)) | ||
| ScalarValue::Utf8(Some(method)) | ||
| ScalarValue::LargeUtf8(Some(method)) => method.parse::<DigestAlgorithm>(), | ||
other => exec_err!("Unsupported data type {other:?} for function digest"), | ||
ColumnarValue::Scalar(scalar) => match scalar.try_as_str() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could also avoid a bunch more duplication of stuff like this:
If we added a similar convenience method Similarly we could do the same with |
||
Some(Some(method)) => method.parse::<DigestAlgorithm>(), | ||
_ => exec_err!("Unsupported data type {scalar:?} for function digest"), | ||
}, | ||
ColumnarValue::Array(_) => { | ||
internal_err!("Digest using dynamically decided method is not yet supported") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -211,14 +211,12 @@ where | |
))), | ||
other => exec_err!("Unsupported data type {other:?} for function {name}"), | ||
}, | ||
ColumnarValue::Scalar(scalar) => match scalar { | ||
ScalarValue::Utf8View(a) | ||
| ScalarValue::LargeUtf8(a) | ||
| ScalarValue::Utf8(a) => { | ||
ColumnarValue::Scalar(scalar) => match scalar.try_as_str() { | ||
Some(a) => { | ||
let result = a.as_ref().map(|x| op(x)).transpose()?; | ||
Ok(ColumnarValue::Scalar(S::scalar(result))) | ||
} | ||
other => exec_err!("Unsupported data type {other:?} for function {name}"), | ||
_ => exec_err!("Unsupported data type {scalar:?} for function {name}"), | ||
}, | ||
} | ||
} | ||
|
@@ -270,10 +268,8 @@ where | |
} | ||
}, | ||
// if the first argument is a scalar utf8 all arguments are expected to be scalar utf8 | ||
ColumnarValue::Scalar(scalar) => match scalar { | ||
ScalarValue::Utf8View(a) | ||
| ScalarValue::LargeUtf8(a) | ||
| ScalarValue::Utf8(a) => { | ||
ColumnarValue::Scalar(scalar) => match scalar.try_as_str() { | ||
Some(a) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is clearer now |
||
let a = a.as_ref(); | ||
// ASK: Why do we trust `a` to be non-null at this point? | ||
let a = unwrap_or_internal_err!(a); | ||
|
@@ -291,7 +287,7 @@ where | |
}; | ||
|
||
if let Some(s) = x { | ||
match op(a.as_str(), s.as_str()) { | ||
match op(a, s.as_str()) { | ||
Ok(r) => { | ||
ret = Some(Ok(ColumnarValue::Scalar(S::scalar(Some( | ||
op2(r), | ||
|
@@ -408,19 +404,10 @@ where | |
DataType::Utf8 => Ok(a.as_string::<i32>().value(pos)), | ||
other => exec_err!("Unexpected type encountered '{other}'"), | ||
}, | ||
ColumnarValue::Scalar(s) => match s { | ||
ScalarValue::Utf8View(a) | ||
| ScalarValue::LargeUtf8(a) | ||
| ScalarValue::Utf8(a) => { | ||
if let Some(v) = a { | ||
Ok(v.as_str()) | ||
} else { | ||
continue; | ||
} | ||
} | ||
other => { | ||
exec_err!("Unexpected scalar type encountered '{other}'") | ||
} | ||
ColumnarValue::Scalar(s) => match s.try_as_str() { | ||
Some(Some(v)) => Ok(v), | ||
Some(None) => continue, // null string | ||
None => exec_err!("Unexpected scalar type encountered '{s}'"), | ||
}, | ||
}?; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is the new function