Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix regression list Type Coercion List with inner type struct which has large/view types #14385

Merged
merged 3 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -507,18 +507,19 @@ fn type_union_resolution_coercion(
None
}

let types = lhs
let coerced_types = lhs
.iter()
.map(|lhs_field| search_corresponding_coerced_type(lhs_field, rhs))
.collect::<Option<Vec<_>>>()?;

let fields = types
// preserve the field name and nullability
let orig_fields = std::iter::zip(lhs.iter(), rhs.iter());

let fields: Vec<FieldRef> = coerced_types
.into_iter()
.enumerate()
.map(|(i, datatype)| {
Arc::new(Field::new(format!("c{i}"), datatype, true))
})
.collect::<Vec<FieldRef>>();
.zip(orig_fields)
.map(|(datatype, (lhs, rhs))| coerce_fields(datatype, lhs, rhs))
.collect();
Some(DataType::Struct(fields.into()))
}
_ => {
Expand Down
53 changes: 53 additions & 0 deletions datafusion/sqllogictest/test_files/case.slt
Original file line number Diff line number Diff line change
Expand Up @@ -416,5 +416,58 @@ SELECT
end
FROM t;

statement ok
drop table t

# Fix coercion of lists of structs
# https://github.com/apache/datafusion/issues/14154

statement ok
create or replace table t as values
(
100, -- column1 int (so the case isn't constant folded)
[{ 'foo': arrow_cast('baz', 'Utf8View') }], -- column2 has List of Struct w/ Utf8View
[{ 'foo': 'bar' }], -- column3 has List of Struct w/ Utf8
[{ 'foo': 'blarg' }] -- column4 has List of Struct w/ Utf8
);

# This case forces all branches to be coerced to the same type
query ?
SELECT
case
when column1 > 0 then column2
when column1 < 0 then column3
else column4
end
FROM t;
----
[{foo: baz}]

# different orders of the branches
query ?
SELECT
case
when column1 > 0 then column3 -- NB different order
when column1 < 0 then column4
else column2
end
FROM t;
----
[{foo: bar}]

# different orders of the branches
query ?
SELECT
case
when column1 > 0 then column4 -- NB different order
when column1 < 0 then column2
else column3
end
FROM t;
----
[{foo: blarg}]



statement ok
drop table t
8 changes: 4 additions & 4 deletions datafusion/sqllogictest/test_files/struct.slt
Original file line number Diff line number Diff line change
Expand Up @@ -459,14 +459,14 @@ create table t as values({r: 'a', c: 1}), ({r: 'b', c: 2.3});
query ?
select * from t;
----
{c0: a, c1: 1.0}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the column names change is the part of the fix?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes -- these are the field names of the struct type

The values that were written into the table are liek this (a few rows above). Note the field names are r and c

create table t as values({r: 'a', c: 1}), ({r: 'b', c: 2.3});

Because they got coerced (1 needed to get coercered to the same type as 2.3) the field names got reassigned to c0 and c1 🤯 )

The fix in this PR preserves the names from the input which seems much more correct to me

{c0: b, c1: 2.3}
{r: a, c: 1.0}
{r: b, c: 2.3}

query T
select arrow_typeof(column1) from t;
----
Struct([Field { name: "c0", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c1", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])
Struct([Field { name: "c0", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c1", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])
Struct([Field { name: "r", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])
Struct([Field { name: "r", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }])

statement ok
drop table t;
Expand Down