Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade linkedin-calcite-core to 1.21.0.153 and add explicit table alias to solve wrong alias issue #368

Merged
merged 2 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.core.Uncollect;
import org.apache.calcite.rel.core.Values;
import org.apache.calcite.rel.logical.LogicalTableFunctionScan;
import org.apache.calcite.rel.rel2sql.RelToSqlConverter;
import org.apache.calcite.rel.type.RelDataType;
Expand Down Expand Up @@ -89,6 +90,15 @@ public boolean requireCastOnString() {
*/
return true;
}

/**
* Override this method so that there will be explicit and correct table alias for selected fields, which is necessary for
* data type derivation on SqlNodes
*/
@Override
public boolean hasImplicitTableAlias() {
return false;
}
};
}

Expand Down Expand Up @@ -150,12 +160,12 @@ public Result visit(TableScan e) {
*/
@Override
public Result visit(Correlate e) {
final Result leftResult = visitChild(0, e.getLeft());
final Result leftResult = visitChild(0, e.getLeft()).resetAlias();
ljfgem marked this conversation as resolved.
Show resolved Hide resolved

// Add context specifying correlationId has same context as its left child
correlTableMap.put(e.getCorrelationId(), leftResult.qualifiedContext());

final Result rightResult = visitChild(1, e.getRight());
final Result rightResult = visitChild(1, e.getRight()).resetAlias();

SqlNode rightSqlNode = rightResult.asFrom();

Expand Down Expand Up @@ -348,6 +358,31 @@ public Result visit(Uncollect e) {
ImmutableMap.of(projectResult.neededAlias, e.getRowType()));
}

/**
* Override this method to avoid the duplicated alias for {@link org.apache.calcite.rel.logical.LogicalValues}.
* The original {@link org.apache.calcite.rel.rel2sql.SqlImplementor.Result#neededAlias} is `t`, we override
* it to be `null`.
* So that for the input SQL like `SELECT 1`, the translated SQL will be like:
*
* SELECT 1
* FROM (VALUES (0)) t (ZERO)
*
* Without this override, the translated SQL contains duplicated alias `t`:
*
* SELECT 1
* FROM (VALUES (0)) t (ZERO) t
*
* which is wrong.
*
* TODO: Identify and backport the fix from apache-calcite to linkedin-calcite since the duplicated alias issue only happens in linkedin-calcite
*/
@Override
public Result visit(Values e) {
ljfgem marked this conversation as resolved.
Show resolved Hide resolved
final Result originalResult = super.visit(e);
return new Result(originalResult.node, originalResult.clauses, null, originalResult.neededType,
ljfgem marked this conversation as resolved.
Show resolved Hide resolved
originalResult.aliases);
}

private SqlNode generateRightChildForSqlJoinWithLateralViews(BiRel e, Result rightResult) {
SqlNode rightSqlNode = rightResult.asFrom();

Expand Down
174 changes: 88 additions & 86 deletions coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ public void testNoSchemaEvolution() {
CoralSpark coralSpark = CoralSpark.create(relNode);
String expandedSql = coralSpark.getSparkSql();

String expectedSql =
"" + "SELECT *\n" + "FROM fuzzy_union.tablea\n" + "UNION ALL\n" + "SELECT *\n" + "FROM fuzzy_union.tablea";
String expectedSql = "SELECT *\n" + "FROM fuzzy_union.tablea tablea\n" + "UNION ALL\n" + "SELECT *\n"
+ "FROM fuzzy_union.tablea tablea0";

assertEquals(expandedSql, expectedSql);
}
Expand All @@ -57,8 +57,9 @@ public void testNoSchemaEvolutionWithMultipleTables() {
CoralSpark coralSpark = CoralSpark.create(relNode);
String expandedSql = coralSpark.getSparkSql();

String expectedSql = "" + "SELECT *\n" + "FROM (SELECT *\n" + "FROM fuzzy_union.tablea\n" + "UNION ALL\n"
+ "SELECT *\n" + "FROM fuzzy_union.tablea)\n" + "UNION ALL\n" + "SELECT *\n" + "FROM fuzzy_union.tablea";
String expectedSql =
"SELECT *\n" + "FROM (SELECT *\n" + "FROM fuzzy_union.tablea tablea\n" + "UNION ALL\n" + "SELECT *\n"
+ "FROM fuzzy_union.tablea tablea0) t\n" + "UNION ALL\n" + "SELECT *\n" + "FROM fuzzy_union.tablea tablea1";

assertEquals(expandedSql, expectedSql);
}
Expand All @@ -71,8 +72,8 @@ public void testNoSchemaEvolutionWithAlias() {
CoralSpark coralSpark = CoralSpark.create(relNode);
String expandedSql = coralSpark.getSparkSql();

String expectedSql =
"" + "SELECT *\n" + "FROM fuzzy_union.tablea\n" + "UNION ALL\n" + "SELECT *\n" + "FROM fuzzy_union.tablea";
String expectedSql = "SELECT *\n" + "FROM fuzzy_union.tablea tablea\n" + "UNION ALL\n" + "SELECT *\n"
+ "FROM fuzzy_union.tablea tablea0";

assertEquals(expandedSql, expectedSql);
}
Expand All @@ -85,8 +86,8 @@ public void testSingleBranchSchemaEvolution() {
CoralSpark coralSpark = CoralSpark.create(relNode);
String expandedSql = coralSpark.getSparkSql();

String expectedSql = "SELECT *\n" + "FROM fuzzy_union.tableb\n" + "UNION ALL\n"
+ "SELECT a, generic_project(b, 'struct<b1:string>') b\n" + "FROM fuzzy_union.tablec";
String expectedSql = "SELECT *\n" + "FROM fuzzy_union.tableb tableb\n" + "UNION ALL\n"
+ "SELECT tablec.a, generic_project(tablec.b, 'struct<b1:string>') b\n" + "FROM fuzzy_union.tablec tablec";

assertEquals(expandedSql, expectedSql);
}
Expand All @@ -105,8 +106,8 @@ public void testDoubleBranchSameSchemaEvolution() {
// This query currently does not have any generic_projections despite the top level view schema being inconsistent
// because the schemas of the branches evolved the same way.
// This unit test illustrates this behaviour; however, we can re-evaluate our desired behaviour later on.
String expectedSql =
"SELECT *\n" + "FROM fuzzy_union.tabled\n" + "UNION ALL\n" + "SELECT *\n" + "FROM fuzzy_union.tablee";
String expectedSql = "SELECT *\n" + "FROM fuzzy_union.tabled tabled\n" + "UNION ALL\n" + "SELECT *\n"
+ "FROM fuzzy_union.tablee tablee";

assertEquals(expandedSql, expectedSql);
}
Expand All @@ -119,8 +120,9 @@ public void testDoubleBranchDifferentSchemaEvolution() {
CoralSpark coralSpark = CoralSpark.create(relNode);
String expandedSql = coralSpark.getSparkSql();

String expectedSql = "SELECT a, generic_project(b, 'struct<b1:string>') b\n" + "FROM fuzzy_union.tablef\n"
+ "UNION ALL\n" + "SELECT a, generic_project(b, 'struct<b1:string>') b\n" + "FROM fuzzy_union.tableg";
String expectedSql = "SELECT tablef.a, generic_project(tablef.b, 'struct<b1:string>') b\n"
+ "FROM fuzzy_union.tablef tablef\n" + "UNION ALL\n"
+ "SELECT tableg.a, generic_project(tableg.b, 'struct<b1:string>') b\n" + "FROM fuzzy_union.tableg tableg";

assertEquals(expandedSql, expectedSql);
}
Expand All @@ -133,10 +135,11 @@ public void testMoreThanTwoBranchesSchemaEvolution() {
CoralSpark coralSpark = CoralSpark.create(relNode);
String expandedSql = coralSpark.getSparkSql();

String expectedSql =
"SELECT *\n" + "FROM (SELECT a, generic_project(b, 'struct<b1:string>') b\n" + "FROM fuzzy_union.tablef\n"
+ "UNION ALL\n" + "SELECT a, generic_project(b, 'struct<b1:string>') b\n" + "FROM fuzzy_union.tableg)\n"
+ "UNION ALL\n" + "SELECT a, generic_project(b, 'struct<b1:string>') b\n" + "FROM fuzzy_union.tablef";
String expectedSql = "SELECT *\n" + "FROM (SELECT tablef.a, generic_project(tablef.b, 'struct<b1:string>') b\n"
+ "FROM fuzzy_union.tablef tablef\n" + "UNION ALL\n"
+ "SELECT tableg.a, generic_project(tableg.b, 'struct<b1:string>') b\n" + "FROM fuzzy_union.tableg tableg) t1\n"
+ "UNION ALL\n" + "SELECT tablef0.a, generic_project(tablef0.b, 'struct<b1:string>') b\n"
+ "FROM fuzzy_union.tablef tablef0";

assertEquals(expandedSql, expectedSql);
}
Expand All @@ -149,8 +152,8 @@ public void testMapWithStructValueSchemaEvolution() {
CoralSpark coralSpark = CoralSpark.create(relNode);
String expandedSql = coralSpark.getSparkSql();

String expectedSql = "SELECT a, generic_project(b, 'map<string,struct<b1:string>>') b\n"
+ "FROM fuzzy_union.tableh\n" + "UNION ALL\n" + "SELECT *\n" + "FROM fuzzy_union.tablei";
String expectedSql = "SELECT tableh.a, generic_project(tableh.b, 'map<string,struct<b1:string>>') b\n"
+ "FROM fuzzy_union.tableh tableh\n" + "UNION ALL\n" + "SELECT *\n" + "FROM fuzzy_union.tablei tablei";

assertEquals(expandedSql, expectedSql);
}
Expand All @@ -163,8 +166,8 @@ public void testArrayWithStructValueSchemaEvolution() {
CoralSpark coralSpark = CoralSpark.create(relNode);
String expandedSql = coralSpark.getSparkSql();

String expectedSql = "SELECT a, generic_project(b, 'array<struct<b1:string>>') b\n" + "FROM fuzzy_union.tablej\n"
+ "UNION ALL\n" + "SELECT *\n" + "FROM fuzzy_union.tablek";
String expectedSql = "SELECT tablej.a, generic_project(tablej.b, 'array<struct<b1:string>>') b\n"
+ "FROM fuzzy_union.tablej tablej\n" + "UNION ALL\n" + "SELECT *\n" + "FROM fuzzy_union.tablek tablek";

assertEquals(expandedSql, expectedSql);
}
Expand All @@ -178,8 +181,8 @@ public void testDeeplyNestedStructSchemaEvolution() {
String expandedSql = coralSpark.getSparkSql();

String expectedSql =
"SELECT a, generic_project(b, 'struct<b1:string,b2:struct<b3:string,b4:struct<b5:string>>>') b\n"
+ "FROM fuzzy_union.tablel\n" + "UNION ALL\n" + "SELECT *\n" + "FROM fuzzy_union.tablem";
"SELECT tablel.a, generic_project(tablel.b, 'struct<b1:string,b2:struct<b3:string,b4:struct<b5:string>>>') b\n"
+ "FROM fuzzy_union.tablel tablel\n" + "UNION ALL\n" + "SELECT *\n" + "FROM fuzzy_union.tablem tablem";

assertEquals(expandedSql, expectedSql);
}
Expand All @@ -192,8 +195,9 @@ public void testSameSchemaEvolutionWithDifferentOrdering() {
CoralSpark coralSpark = CoralSpark.create(relNode);
String expandedSql = coralSpark.getSparkSql();

String expectedSql = "SELECT *\n" + "FROM fuzzy_union.tablen\n" + "UNION ALL\n"
+ "SELECT a, generic_project(b, 'struct<b2:double,b1:string,b0:int>') b\n" + "FROM fuzzy_union.tableo";
String expectedSql = "SELECT *\n" + "FROM fuzzy_union.tablen tablen\n" + "UNION ALL\n"
+ "SELECT tableo.a, generic_project(tableo.b, 'struct<b2:double,b1:string,b0:int>') b\n"
+ "FROM fuzzy_union.tableo tableo";

assertEquals(expandedSql, expectedSql);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ private boolean isTrinoSupportedUnnest(Uncollect uncollect) {
public Result visit(Correlate e) {
final Result leftResult = visitChild(0, e.getLeft()).resetAlias(e.getCorrelVariable(), e.getLeft().getRowType());
parseCorrelTable(e, leftResult);
final Result rightResult = visitChild(1, e.getRight());
final Result rightResult = visitChild(1, e.getRight()).resetAlias();
SqlNode rightLateral = rightResult.node;
if (rightLateral.getKind() != SqlKind.AS) {
// LATERAL is only needed in Trino if it's not an AS node.
Expand All @@ -280,6 +280,27 @@ public Result visit(Correlate e) {
return result(join, leftResult, rightResult);
}

/**
* Override this method to avoid the duplicated alias for {@link org.apache.calcite.rel.logical.LogicalValues}.
* So that for the input SQL like `SELECT 1`, the translated SQL will be like:
*
* SELECT 1
* FROM (VALUES (0)) t (ZERO)
*
* Without this override, the translated SQL contains duplicated alias `t`:
*
* SELECT 1
* FROM (VALUES (0)) t (ZERO) t
*
* which is wrong.
*/
@Override
public Result visit(Values e) {
final Result originalResult = super.visit(e);
return new Result(originalResult.node, originalResult.clauses, null, originalResult.neededType,
originalResult.aliases);
}
ljfgem marked this conversation as resolved.
Show resolved Hide resolved

@Override
public Context aliasContext(Map<String, RelDataType> aliases, boolean qualified) {
// easier to keep inner class for accessing 'aliases' and 'qualified' variables as closure
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ public boolean supportsCharSet() {
return false;
}

/**
* Override this method so that there will be explicit and correct table alias for selected fields, which is necessary for
* data type derivation on SqlNodes
*/
@Override
public boolean hasImplicitTableAlias() {
return false;
}

public void unparseOffsetFetch(SqlWriter writer, SqlNode offset, SqlNode fetch) {
unparseFetchUsingLimit(writer, offset, fetch);
}
Expand Down
Loading