Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix] Support Generated Column rewrite for logical view with customize column name #52981

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -165,18 +165,35 @@ private void reAnalyzeExpressionBasedOnCurrentScope(SelectRelation childSelectRe
}

// 2. rewrite(rename slotRef) generated column expression(unAnalyzed) using alias in current scope
Map<String, String> slotRefToAlias = new HashMap<>();
// For view relation, column alias may recorded in scope but not SelectListItem in SelectList
Map<String, String> viewRefAlias = new HashMap<>();
if (scope.getRelationId().getSourceNode() instanceof ViewRelation) {
for (Field f : scope.getRelationFields().getAllFields()) {
if (f.getOriginExpression() instanceof SlotRef) {
viewRefAlias.put(((SlotRef) f.getOriginExpression()).getColumnName(), f.getName());
}
}
}

Map<SlotRef, String> slotRefToAlias = new HashMap<>();
for (SelectListItem item : childSelectRelation.getSelectList().getItems()) {
if (item.isStar()) {
slotRefToAlias.clear();
break;
}

if (!(item.getExpr() instanceof SlotRef) || (item.getAlias() == null || item.getAlias().isEmpty())) {
if (!(item.getExpr() instanceof SlotRef)) {
continue;
}

slotRefToAlias.put(((SlotRef) item.getExpr()).toSql(), item.getAlias());
if (!viewRefAlias.isEmpty()) {
String nameKey = ((SlotRef) item.getExpr()).getColumnName();
if (viewRefAlias.containsKey(nameKey)) {
slotRefToAlias.put((SlotRef) item.getExpr(), viewRefAlias.get(nameKey));
}
} else if (item.getAlias() != null && !item.getAlias().isEmpty()) {
slotRefToAlias.put((SlotRef) item.getExpr(), item.getAlias());
}
}
List<SlotRef> allRefSlotRefs = new ArrayList<>();
for (Map.Entry<Expr, SlotRef> entry : generatedExprToColumnRef.entrySet()) {
Expand All @@ -188,7 +205,7 @@ private void reAnalyzeExpressionBasedOnCurrentScope(SelectRelation childSelectRe
}
for (SlotRef slotRef : allRefSlotRefs) {
if (!slotRefToAlias.isEmpty()) {
String alias = slotRefToAlias.get(slotRef.toSql());
String alias = slotRefToAlias.get(slotRef);
if (alias != null) {
slotRef.setColumnName(alias);
}
Expand Down Expand Up @@ -223,7 +240,7 @@ public Void visitTable(TableRelation tableRelation, Scope scope) {
for (Column column : table.getBaseSchema()) {
Expr generatedColumnExpression = column.getGeneratedColumnExpr(table.getIdToColumn());
if (generatedColumnExpression != null) {
SlotRef slotRef = new SlotRef(null, column.getName());
SlotRef slotRef = new SlotRef(null, column.getName(), column.getName());
ExpressionAnalyzer.analyzeExpression(generatedColumnExpression, new AnalyzeState(), scope, session);
ExpressionAnalyzer.analyzeExpression(slotRef, new AnalyzeState(), scope, session);
generatedExprToColumnRef.put(generatedColumnExpression, slotRef);
Expand Down Expand Up @@ -262,6 +279,10 @@ public Void visitJoin(JoinRelation joinRelation, Scope scope) {
public Void visitView(ViewRelation node, Scope scope) {
QueryRelation queryRelation = node.getQueryStatement().getQueryRelation();
if (queryRelation instanceof SubqueryRelation) {
// use scope for view relation to analyze subqueryRelation, clear the pre-analyzed
// information first.
queryRelation.getGeneratedExprToColumnRef().clear();
visitSubquery((SubqueryRelation) queryRelation, scope);
node.setGeneratedExprToColumnRef(queryRelation.getGeneratedExprToColumnRef());
} else if (queryRelation instanceof SelectRelation) {
SelectRelation childSelectRelation = (SelectRelation) queryRelation;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The most risky bug in this code is:
Potential ClassCastException due to unsafe casting.

You can modify the code like this:

// Check instance of expected class before casting
Map<String, String> viewRefAlias = new HashMap<>();
if (scope.getRelationId().getSourceNode() instanceof ViewRelation) {
    for (Field f : scope.getRelationFields().getAllFields()) {
        if (f.getOriginExpression() instanceof SlotRef) {
            viewRefAlias.put(((SlotRef) f.getOriginExpression()).getColumnName(), f.getName());
        }
    }
}

Map<SlotRef, String> slotRefToAlias = new HashMap<>();
for (SelectListItem item : childSelectRelation.getSelectList().getItems()) {
    if (item.isStar()) {
        slotRefToAlias.clear();
        break;
    }

    if (!(item.getExpr() instanceof SlotRef)) {
        continue;
    }

    SlotRef exprSlotRef = (SlotRef) item.getExpr();
    
    if (!viewRefAlias.isEmpty()) {
        String nameKey = exprSlotRef.getColumnName();
        if (viewRefAlias.containsKey(nameKey)) {
            slotRefToAlias.put(exprSlotRef, viewRefAlias.get(nameKey));
        }
    } else if (item.getAlias() != null && !item.getAlias().isEmpty()) {
        slotRefToAlias.put(exprSlotRef, item.getAlias());
    }
}

This change ensures the cast is safe by storing the result of the cast in a variable and checking before using it further.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ public static RelationId of(Relation sourceNode) {
return new RelationId(requireNonNull(sourceNode, "source cannot be null"));
}

public Relation getSourceNode() {
return sourceNode;
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand Down
84 changes: 84 additions & 0 deletions test/sql/test_materialized_column/R/test_generated_column_rewrite
Original file line number Diff line number Diff line change
Expand Up @@ -707,4 +707,88 @@ None
-- !result
DROP VIEW v2;
-- result:
-- !result
-- name: test_view_analyzed_rewrite_failure
CREATE TABLE `t_view_analyzed_rewrite_failure_1` (
`id_1` bigint(20) NOT NULL COMMENT "",
`col_2` STRING AS CONCAT(CAST(id_1 AS STRING), "_abc")
) ENGINE=OLAP
DUPLICATE KEY(`id_1`)
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_num" = "1",
"in_memory" = "false",
"enable_persistent_index" = "false",
"replicated_storage" = "true",
"fast_schema_evolution" = "true",
"compression" = "LZ4"
);
-- result:
-- !result
CREATE TABLE `t_view_analyzed_rewrite_failure_2` (
`id_3` bigint(20) NOT NULL COMMENT "",
`col_4` STRING AS CONCAT(CAST(id_3 AS STRING), "_abc")
) ENGINE=OLAP
DUPLICATE KEY(`id_3`)
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_num" = "1",
"in_memory" = "false",
"enable_persistent_index" = "false",
"replicated_storage" = "true",
"fast_schema_evolution" = "true",
"compression" = "LZ4"
);
-- result:
-- !result
INSERT INTO t_view_analyzed_rewrite_failure_1 VALUES (1);
-- result:
-- !result
INSERT INTO t_view_analyzed_rewrite_failure_2 VALUES (1);
-- result:
-- !result
INSERT INTO t_view_analyzed_rewrite_failure_2 VALUES (2);
-- result:
-- !result
CREATE VIEW v1 (v1_col1, v1_col2, v1_col3, v1_col4) as
WITH tmp as (SELECT id_1, col_2 from `t_view_analyzed_rewrite_failure_1`)
select tmp.id_1, tmp.col_2, t_view_analyzed_rewrite_failure_2.id_3, t_view_analyzed_rewrite_failure_2.col_4 from tmp, t_view_analyzed_rewrite_failure_2 where
CONCAT(CAST(tmp.id_1 AS STRING), "_abc") = CONCAT(CAST(t_view_analyzed_rewrite_failure_2.id_3 AS STRING), "_abc");
-- result:
-- !result
CREATE VIEW v2 (v1_col1, v1_col2, v1_col3, v1_col4) as
(WITH tmp as (SELECT id_1, col_2 from `t_view_analyzed_rewrite_failure_1`)
select tmp.id_1, tmp.col_2, t_view_analyzed_rewrite_failure_2.id_3, t_view_analyzed_rewrite_failure_2.col_4 from tmp, t_view_analyzed_rewrite_failure_2 where
CONCAT(CAST(tmp.id_1 AS STRING), "_abc") = CONCAT(CAST(t_view_analyzed_rewrite_failure_2.id_3 AS STRING), "_abc"));
-- result:
-- !result
CREATE VIEW v3 (v1_col1, v1_col2, v1_col3, v1_col4) as
WITH tmp as (SELECT id_1, col_2 from `t_view_analyzed_rewrite_failure_1`)
select tmp.id_1 AS new_col1, tmp.col_2 AS new_col2, t_view_analyzed_rewrite_failure_2.id_3 AS new_col3, t_view_analyzed_rewrite_failure_2.col_4 AS new_col4 from tmp, t_view_analyzed_rewrite_failure_2 where
CONCAT(CAST(tmp.id_1 AS STRING), "_abc") = CONCAT(CAST(t_view_analyzed_rewrite_failure_2.id_3 AS STRING), "_abc");
-- result:
-- !result
function: assert_explain_not_contains('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v1', "abc")
-- result:
None
-- !result
function: assert_is_identical_explain_plan('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v1', "SELECT v1_col2 from v1")
-- result:
None
-- !result
function: assert_explain_not_contains('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v2', "abc")
-- result:
None
-- !result
function: assert_is_identical_explain_plan('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v2', "SELECT v1_col2 from v2")
-- result:
None
-- !result
function: assert_explain_not_contains('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v3', "abc")
-- result:
None
-- !result
function: assert_is_identical_explain_plan('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v3', "SELECT v1_col2 from v3")
-- result:
None
-- !result
59 changes: 59 additions & 0 deletions test/sql/test_materialized_column/T/test_generated_column_rewrite
Original file line number Diff line number Diff line change
Expand Up @@ -409,3 +409,62 @@ DROP VIEW v1;
CREATE VIEW v2 AS WITH tmp AS (SELECT t1.k AS col1, t1.v AS col2, t2.k AS col3, t2.v AS col4 FROM t1, t2) SELECT tmp.col1 + 10, tmp.col3 + 10 FROM tmp;
function: assert_explain_contains('SELECT * FROM v2', "k + 10")
DROP VIEW v2;

-- name: test_view_analyzed_rewrite_failure
CREATE TABLE `t_view_analyzed_rewrite_failure_1` (
`id_1` bigint(20) NOT NULL COMMENT "",
`col_2` STRING AS CONCAT(CAST(id_1 AS STRING), "_abc")
) ENGINE=OLAP
DUPLICATE KEY(`id_1`)
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_num" = "1",
"in_memory" = "false",
"enable_persistent_index" = "false",
"replicated_storage" = "true",
"fast_schema_evolution" = "true",
"compression" = "LZ4"
);

CREATE TABLE `t_view_analyzed_rewrite_failure_2` (
`id_3` bigint(20) NOT NULL COMMENT "",
`col_4` STRING AS CONCAT(CAST(id_3 AS STRING), "_abc")
) ENGINE=OLAP
DUPLICATE KEY(`id_3`)
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_num" = "1",
"in_memory" = "false",
"enable_persistent_index" = "false",
"replicated_storage" = "true",
"fast_schema_evolution" = "true",
"compression" = "LZ4"
);

INSERT INTO t_view_analyzed_rewrite_failure_1 VALUES (1);
INSERT INTO t_view_analyzed_rewrite_failure_2 VALUES (1);
INSERT INTO t_view_analyzed_rewrite_failure_2 VALUES (2);

CREATE VIEW v1 (v1_col1, v1_col2, v1_col3, v1_col4) as
WITH tmp as (SELECT id_1, col_2 from `t_view_analyzed_rewrite_failure_1`)
select tmp.id_1, tmp.col_2, t_view_analyzed_rewrite_failure_2.id_3, t_view_analyzed_rewrite_failure_2.col_4 from tmp, t_view_analyzed_rewrite_failure_2 where
CONCAT(CAST(tmp.id_1 AS STRING), "_abc") = CONCAT(CAST(t_view_analyzed_rewrite_failure_2.id_3 AS STRING), "_abc");

CREATE VIEW v2 (v1_col1, v1_col2, v1_col3, v1_col4) as
(WITH tmp as (SELECT id_1, col_2 from `t_view_analyzed_rewrite_failure_1`)
select tmp.id_1, tmp.col_2, t_view_analyzed_rewrite_failure_2.id_3, t_view_analyzed_rewrite_failure_2.col_4 from tmp, t_view_analyzed_rewrite_failure_2 where
CONCAT(CAST(tmp.id_1 AS STRING), "_abc") = CONCAT(CAST(t_view_analyzed_rewrite_failure_2.id_3 AS STRING), "_abc"));

CREATE VIEW v3 (v1_col1, v1_col2, v1_col3, v1_col4) as
WITH tmp as (SELECT id_1, col_2 from `t_view_analyzed_rewrite_failure_1`)
select tmp.id_1 AS new_col1, tmp.col_2 AS new_col2, t_view_analyzed_rewrite_failure_2.id_3 AS new_col3, t_view_analyzed_rewrite_failure_2.col_4 AS new_col4 from tmp, t_view_analyzed_rewrite_failure_2 where
CONCAT(CAST(tmp.id_1 AS STRING), "_abc") = CONCAT(CAST(t_view_analyzed_rewrite_failure_2.id_3 AS STRING), "_abc");

function: assert_explain_not_contains('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v1', "abc")
function: assert_is_identical_explain_plan('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v1', "SELECT v1_col2 from v1")

function: assert_explain_not_contains('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v2', "abc")
function: assert_is_identical_explain_plan('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v2', "SELECT v1_col2 from v2")

function: assert_explain_not_contains('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v3', "abc")
function: assert_is_identical_explain_plan('SELECT CONCAT(CAST(v1_col1 AS STRING), "_abc") from v3', "SELECT v1_col2 from v3")
Loading