Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(snowflake): opt-in denormalization of column names #24982

Merged
merged 6 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions superset-frontend/src/components/Datasource/DatasourceEditor.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,7 @@ class DatasourceEditor extends React.PureComponent {
table_name: datasource.table_name
? encodeURIComponent(datasource.table_name)
: datasource.table_name,
normalize_columns: datasource.normalize_columns,
};
Object.entries(params).forEach(([key, value]) => {
// rison can't encode the undefined value
Expand Down Expand Up @@ -993,6 +994,15 @@ class DatasourceEditor extends React.PureComponent {
control={<TextControl controlId="template_params" />}
/>
)}
<Field
inline
fieldKey="normalize_columns"
label={t('Normalize column names')}
description={t(
'Allow column names to be changed to case insensitive format',
)}
control={<CheckboxControl controlId="normalize_columns" />}
/>
</Fieldset>
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ const DatasourceModal: FunctionComponent<DatasourceModalProps> = ({
schema,
description: currentDatasource.description,
main_dttm_col: currentDatasource.main_dttm_col,
normalize_columns: currentDatasource.normalize_columns,
offset: currentDatasource.offset,
default_endpoint: currentDatasource.default_endpoint,
cache_timeout:
Expand Down
1 change: 1 addition & 0 deletions superset-frontend/src/features/datasets/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,5 @@ export type DatasetObject = {
metrics: MetricObject[];
extra?: string;
is_managed_externally: boolean;
normalize_columns: boolean;
};
3 changes: 3 additions & 0 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ class SqlaTable(
is_sqllab_view = Column(Boolean, default=False)
template_params = Column(Text)
extra = Column(Text)
normalize_columns = Column(Boolean)

baselink = "tablemodelview"

Expand All @@ -564,6 +565,7 @@ class SqlaTable(
"filter_select_enabled",
"fetch_values_predicate",
"extra",
"normalize_columns",
]
update_from_object_fields = [f for f in export_fields if f != "database_id"]
export_parent = "database"
Expand Down Expand Up @@ -717,6 +719,7 @@ def external_metadata(self) -> list[ResultSetColumnType]:
database=self.database,
table_name=self.table_name,
schema_name=self.schema,
normalize_columns=self.normalize_columns,
)

@property
Expand Down
6 changes: 5 additions & 1 deletion superset/connectors/sqla/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
def get_physical_table_metadata(
database: Database,
table_name: str,
normalize_columns: bool,
schema_name: str | None = None,
) -> list[ResultSetColumnType]:
"""Use SQLAlchemy inspector to get table metadata"""
Expand All @@ -67,7 +68,10 @@ def get_physical_table_metadata(
for col in cols:
try:
if isinstance(col["type"], TypeEngine):
name = db_engine_spec.denormalize_name(db_dialect, col["column_name"])
name = col["column_name"]
if not normalize_columns:
name = db_engine_spec.denormalize_name(db_dialect, name)

db_type = db_engine_spec.column_datatype_to_string(
col["type"], db_dialect
)
Expand Down
4 changes: 4 additions & 0 deletions superset/connectors/sqla/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ class TableModelView( # pylint: disable=too-many-ancestors
"is_sqllab_view",
"template_params",
"extra",
"normalize_columns",
]
base_filters = [["id", DatasourceFilter, lambda: []]]
show_columns = edit_columns + ["perm", "slices"]
Expand Down Expand Up @@ -379,6 +380,9 @@ class TableModelView( # pylint: disable=too-many-ancestors
'}, "warning_markdown": "This is a warning." }`.',
True,
),
"normalize_columns": _(
"Allow column names to be changed to case insensitive format"
),
}
label_columns = {
"slices": _("Associated Charts"),
Expand Down
1 change: 1 addition & 0 deletions superset/dashboards/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ class DashboardDatasetSchema(Schema):
verbose_map = fields.Dict(fields.Str(), fields.Str())
time_grain_sqla = fields.List(fields.List(fields.Str()))
granularity_sqla = fields.List(fields.List(fields.Str()))
normalize_columns = fields.Bool()


class BaseDashboardSchema(Schema):
Expand Down
2 changes: 2 additions & 0 deletions superset/datasets/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ class DatasetRestApi(BaseSupersetModelRestApi):
"schema",
"description",
"main_dttm_col",
"normalize_columns",
"offset",
"default_endpoint",
"cache_timeout",
Expand Down Expand Up @@ -219,6 +220,7 @@ class DatasetRestApi(BaseSupersetModelRestApi):
"schema",
"description",
"main_dttm_col",
"normalize_columns",
"offset",
"default_endpoint",
"cache_timeout",
Expand Down
1 change: 1 addition & 0 deletions superset/datasets/commands/duplicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def run(self) -> Model:
table.database = database
table.schema = self._base_model.schema
table.template_params = self._base_model.template_params
table.normalize_columns = self._base_model.normalize_columns
table.is_sqllab_view = True
table.sql = ParsedQuery(self._base_model.sql).stripped()
db.session.add(table)
Expand Down
1 change: 1 addition & 0 deletions superset/datasets/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ class DatasetPutSchema(Schema):
schema = fields.String(allow_none=True, validate=Length(0, 255))
description = fields.String(allow_none=True)
main_dttm_col = fields.String(allow_none=True)
normalize_columns = fields.Boolean(allow_none=True, dump_default=False)
offset = fields.Integer(allow_none=True)
default_endpoint = fields.String(allow_none=True)
cache_timeout = fields.Integer(allow_none=True)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""add_normalize_columns_to_sqla_model

Revision ID: 9f4a086c2676
Revises: 4448fa6deeb1
Create Date: 2023-08-14 09:38:11.897437

"""

# revision identifiers, used by Alembic.
revision = '9f4a086c2676'
down_revision = '4448fa6deeb1'

from alembic import op
import sqlalchemy as sa
from sqlalchemy.orm import Session
from sqlalchemy.ext.declarative import declarative_base

from superset import db
from superset.migrations.shared.utils import paginated_update


Base = declarative_base()

class SqlaTable(Base):
__tablename__ = "tables"

id = sa.Column(sa.Integer, primary_key=True)
normalize_columns = sa.Column(sa.Boolean)


def upgrade():
op.add_column(
"tables",
sa.Column("normalize_columns", sa.Boolean(), nullable=True, default=False),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the difference between NULL and FALSE?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@john-bodley they're essentially the same. Would you prefer I change it to just nullable=True without a default value, or just have the default value?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we only need two states, then I would stick with TRUE and FALSE, i.e., non-nullable, unless there's a performance or storage cost for using FALSE rather than NULL—which likely will be the predominant value.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems SQLAlchemy is slightly flaky when it comes to assigning default values with the NULL constraint in place. I dug around, and found that the is_sqllab_viz flag on the SqlaTable model is expected to work similarly, and there the migration also had to allow for nullable=True. So reverting back to that.

)

bind = op.get_bind()
session = db.Session(bind=bind)

for table in paginated_update(session.query(SqlaTable)):
table.normalize_columns = True


def downgrade():
op.drop_column("tables", "normalize_columns")
6 changes: 5 additions & 1 deletion superset/views/datasource/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any
from typing import Any, Optional

from marshmallow import fields, post_load, pre_load, Schema, validate
from typing_extensions import TypedDict
Expand All @@ -29,13 +29,15 @@ class ExternalMetadataParams(TypedDict):
database_name: str
schema_name: str
table_name: str
normalize_columns: Optional[bool]


get_external_metadata_schema = {
"datasource_type": "string",
"database_name": "string",
"schema_name": "string",
"table_name": "string",
"normalize_columns": "boolean",
}


Expand All @@ -44,6 +46,7 @@ class ExternalMetadataSchema(Schema):
database_name = fields.Str(required=True)
schema_name = fields.Str(allow_none=True)
table_name = fields.Str(required=True)
normalize_columns = fields.Bool(allow_none=True)

# pylint: disable=unused-argument
@post_load
Expand All @@ -57,6 +60,7 @@ def normalize(
database_name=data["database_name"],
schema_name=data.get("schema_name", ""),
table_name=data["table_name"],
normalize_columns=data["normalize_columns"],
)


Expand Down
1 change: 1 addition & 0 deletions superset/views/datasource/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def external_metadata_by_name(self, **kwargs: Any) -> FlaskResponse:
database=database,
table_name=params["table_name"],
schema_name=params["schema_name"],
normalize_columns=params.get("normalize_columns", False),
)
except (NoResultFound, NoSuchTableError) as ex:
raise DatasetNotFoundError() from ex
Expand Down
5 changes: 5 additions & 0 deletions tests/integration_tests/datasource_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def test_external_metadata_by_name_for_physical_table(self):
"database_name": tbl.database.database_name,
"schema_name": tbl.schema,
"table_name": tbl.table_name,
"normalize_columns": tbl.normalize_columns,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add some tests that create a dataset with/without this value to check for api backwards compatibility?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, I'll do that 👍

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
Expand Down Expand Up @@ -133,6 +134,7 @@ def test_external_metadata_by_name_for_virtual_table(self):
"database_name": tbl.database.database_name,
"schema_name": tbl.schema,
"table_name": tbl.table_name,
"normalize_columns": tbl.normalize_columns,
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
Expand All @@ -151,6 +153,7 @@ def test_external_metadata_by_name_from_sqla_inspector(self):
"database_name": example_database.database_name,
"table_name": "test_table",
"schema_name": get_example_default_schema(),
"normalize_columns": False,
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
Expand All @@ -164,6 +167,7 @@ def test_external_metadata_by_name_from_sqla_inspector(self):
"datasource_type": "table",
"database_name": "foo",
"table_name": "bar",
"normalize_columns": False,
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
Expand All @@ -180,6 +184,7 @@ def test_external_metadata_by_name_from_sqla_inspector(self):
"datasource_type": "table",
"database_name": example_database.database_name,
"table_name": "fooooooooobarrrrrr",
"normalize_columns": False,
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
Expand Down
2 changes: 2 additions & 0 deletions tests/unit_tests/datasets/commands/export_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def test_export(session: Session) -> None:
is_sqllab_view=0, # no longer used?
template_params=json.dumps({"answer": "42"}),
schema_perm=None,
normalize_columns=False,
extra=json.dumps({"warning_markdown": "*WARNING*"}),
)

Expand Down Expand Up @@ -108,6 +109,7 @@ def test_export(session: Session) -> None:
fetch_values_predicate: foo IN (1, 2)
extra:
warning_markdown: '*WARNING*'
normalize_columns: false
uuid: null
metrics:
- metric_name: cnt
Expand Down