From 5d9a975e9d60c18cdae9abf269321745d75f2a35 Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" <70410625+michael-s-molina@users.noreply.github.com> Date: Thu, 15 Feb 2024 11:22:57 -0500 Subject: [PATCH] refactor: Updates some database columns to MediumText (#27119) --- UPDATING.md | 2 + superset/connectors/sqla/models.py | 2 +- ..._17fcea065655_change_text_to_mediumtext.py | 87 +++++++++++++++++++ superset/models/annotations.py | 5 +- superset/models/core.py | 4 +- superset/models/dashboard.py | 2 +- superset/models/helpers.py | 3 +- superset/models/slice.py | 4 +- superset/models/sql_lab.py | 12 +-- superset/reports/models.py | 13 ++- 10 files changed, 111 insertions(+), 23 deletions(-) create mode 100644 superset/migrations/versions/2024-02-14_14-43_17fcea065655_change_text_to_mediumtext.py diff --git a/UPDATING.md b/UPDATING.md index fec78be67d8f8..8f5785ac8c97d 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -24,6 +24,8 @@ assists people when migrating to a new version. ## Next +- [27119](https://github.com/apache/superset/pull/27119): Updates various database columns to use the `MediumText` type, potentially requiring a table lock on MySQL dbs or taking some time to complete on large deployments. + - [26450](https://github.com/apache/superset/pull/26450): Deprecates the `KV_STORE` feature flag and its related assets such as the API endpoint and `keyvalue` table. The main dependency of this feature is the `SHARE_QUERIES_VIA_KV_STORE` feature flag which allows sharing SQL Lab queries without the necessity of saving the query. Our intention is to use the permalink feature to implement this use case before 5.0 and that's why we are deprecating the feature flag now. ### Breaking Changes diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 2552740695f62..089b9c2f28960 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -2116,4 +2116,4 @@ class RowLevelSecurityFilter(Model, AuditMixinNullable): secondary=RLSFilterTables, backref="row_level_security_filters", ) - clause = Column(Text, nullable=False) + clause = Column(MediumText(), nullable=False) diff --git a/superset/migrations/versions/2024-02-14_14-43_17fcea065655_change_text_to_mediumtext.py b/superset/migrations/versions/2024-02-14_14-43_17fcea065655_change_text_to_mediumtext.py new file mode 100644 index 0000000000000..e63ab6ac5644a --- /dev/null +++ b/superset/migrations/versions/2024-02-14_14-43_17fcea065655_change_text_to_mediumtext.py @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""change_text_to_mediumtext + +Revision ID: 17fcea065655 +Revises: 87d38ad83218 +Create Date: 2024-02-14 14:43:39.898093 + +""" + +# revision identifiers, used by Alembic. +revision = "17fcea065655" +down_revision = "87d38ad83218" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects.mysql.base import MySQLDialect + +from superset.utils.core import MediumText + +TABLE_COLUMNS = [ + "annotation.json_metadata", + "css_templates.css", + "dashboards.css", + "keyvalue.value", + "query.extra_json", + "query.executed_sql", + "query.select_sql", + "report_execution_log.value_row_json", + "report_recipient.recipient_config_json", + "report_schedule.sql", + "report_schedule.last_value_row_json", + "report_schedule.validator_config_json", + "report_schedule.extra_json", + "row_level_security_filters.clause", + "saved_query.sql", + "saved_query.extra_json", + "sl_columns.extra_json", + "sl_datasets.extra_json", + "sl_tables.extra_json", + "slices.params", + "slices.query_context", + "ssh_tunnels.extra_json", + "tab_state.extra_json", + "tab_state.sql", + "table_schema.extra_json", +] + +NOT_NULL_COLUMNS = ["keyvalue.value", "row_level_security_filters.clause"] + + +def upgrade(): + if isinstance(op.get_bind().dialect, MySQLDialect): + for column in TABLE_COLUMNS: + with op.batch_alter_table(column.split(".")[0]) as batch_op: + batch_op.alter_column( + column.split(".")[1], + existing_type=sa.Text(), + type_=MediumText(), + existing_nullable=column not in NOT_NULL_COLUMNS, + ) + + +def downgrade(): + if isinstance(op.get_bind().dialect, MySQLDialect): + for column in TABLE_COLUMNS: + with op.batch_alter_table(column.split(".")[0]) as batch_op: + batch_op.alter_column( + column.split(".")[1], + existing_type=MediumText(), + type_=sa.Text(), + existing_nullable=column not in NOT_NULL_COLUMNS, + ) diff --git a/superset/models/annotations.py b/superset/models/annotations.py index 54de94e7f6457..d8b6f8b1fa735 100644 --- a/superset/models/annotations.py +++ b/superset/models/annotations.py @@ -22,10 +22,10 @@ from sqlalchemy.orm import relationship from superset.models.helpers import AuditMixinNullable +from superset.utils.core import MediumText class AnnotationLayer(Model, AuditMixinNullable): - """A logical namespace for a set of annotations""" __tablename__ = "annotation_layer" @@ -38,7 +38,6 @@ def __repr__(self) -> str: class Annotation(Model, AuditMixinNullable): - """Time-related annotation""" __tablename__ = "annotation" @@ -49,7 +48,7 @@ class Annotation(Model, AuditMixinNullable): short_descr = Column(String(500)) long_descr = Column(Text) layer = relationship(AnnotationLayer, backref="annotation") - json_metadata = Column(Text) + json_metadata = Column(MediumText()) __table_args__ = (Index("ti_dag_state", layer_id, start_dttm, end_dttm),) diff --git a/superset/models/core.py b/superset/models/core.py index a8d0cdeb517ba..71a6e9d04237b 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -96,7 +96,7 @@ class KeyValue(Model): # pylint: disable=too-few-public-methods __tablename__ = "keyvalue" id = Column(Integer, primary_key=True) - value = Column(Text, nullable=False) + value = Column(utils.MediumText(), nullable=False) class CssTemplate(Model, AuditMixinNullable): @@ -105,7 +105,7 @@ class CssTemplate(Model, AuditMixinNullable): __tablename__ = "css_templates" id = Column(Integer, primary_key=True) template_name = Column(String(250)) - css = Column(Text, default="") + css = Column(utils.MediumText(), default="") class ConfigurationMethod(StrEnum): diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index 01b1bf9624a54..5570e892ff7d6 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -137,7 +137,7 @@ class Dashboard(AuditMixinNullable, ImportExportMixin, Model): dashboard_title = Column(String(500)) position_json = Column(utils.MediumText()) description = Column(Text) - css = Column(Text) + css = Column(utils.MediumText()) certified_by = Column(Text) certification_details = Column(Text) json_metadata = Column(utils.MediumText()) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index fb2f959f31b99..8d3ed36c465f7 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -89,6 +89,7 @@ get_column_name, get_user_id, is_adhoc_column, + MediumText, remove_duplicates, ) from superset.utils.dates import datetime_to_epoch @@ -584,7 +585,7 @@ def __init__( # pylint: disable=too-many-arguments class ExtraJSONMixin: """Mixin to add an `extra` column (JSON) and utility methods""" - extra_json = sa.Column(sa.Text, default="{}") + extra_json = sa.Column(MediumText(), default="{}") @property def extra(self) -> dict[str, Any]: diff --git a/superset/models/slice.py b/superset/models/slice.py index b41bb72a85496..eb2b220c8ff3e 100644 --- a/superset/models/slice.py +++ b/superset/models/slice.py @@ -78,8 +78,8 @@ class Slice( # pylint: disable=too-many-public-methods datasource_type = Column(String(200)) datasource_name = Column(String(2000)) viz_type = Column(String(250)) - params = Column(Text) - query_context = Column(Text) + params = Column(utils.MediumText()) + query_context = Column(utils.MediumText()) description = Column(Text) cache_timeout = Column(Integer) perm = Column(String(1000)) diff --git a/superset/models/sql_lab.py b/superset/models/sql_lab.py index a0e9fa6b6eb0a..f4724d6dabb69 100644 --- a/superset/models/sql_lab.py +++ b/superset/models/sql_lab.py @@ -55,7 +55,7 @@ ) from superset.sql_parse import CtasMethod, ParsedQuery, Table from superset.sqllab.limiting_factor import LimitingFactor -from superset.utils.core import get_column_name, QueryStatus, user_label +from superset.utils.core import get_column_name, MediumText, QueryStatus, user_label if TYPE_CHECKING: from superset.connectors.sqla.models import TableColumn @@ -88,11 +88,11 @@ class Query( tab_name = Column(String(256)) sql_editor_id = Column(String(256)) schema = Column(String(256)) - sql = Column(Text) + sql = Column(MediumText()) # Query to retrieve the results, # used only in case of select_as_cta_used is true. - select_sql = Column(Text) - executed_sql = Column(Text) + select_sql = Column(MediumText()) + executed_sql = Column(MediumText()) # Could be configured in the superset config. limit = Column(Integer) limiting_factor = Column( @@ -365,7 +365,7 @@ class SavedQuery(AuditMixinNullable, ExtraJSONMixin, ImportExportMixin, Model): schema = Column(String(128)) label = Column(String(256)) description = Column(Text) - sql = Column(Text) + sql = Column(MediumText()) template_parameters = Column(Text) user = relationship( security_manager.user_model, @@ -467,7 +467,7 @@ class TabState(AuditMixinNullable, ExtraJSONMixin, Model): ) # the query in the textarea, and results (if any) - sql = Column(Text) + sql = Column(MediumText()) query_limit = Column(Integer) # latest query that was run diff --git a/superset/reports/models.py b/superset/reports/models.py index 59135cda6cbae..022db5dc6fce6 100644 --- a/superset/reports/models.py +++ b/superset/reports/models.py @@ -41,6 +41,7 @@ from superset.models.slice import Slice from superset.reports.types import ReportScheduleExtra from superset.utils.backports import StrEnum +from superset.utils.core import MediumText metadata = Model.metadata # pylint: disable=no-member @@ -108,7 +109,6 @@ class ReportSourceFormat(StrEnum): class ReportSchedule(AuditMixinNullable, ExtraJSONMixin, Model): - """ Report Schedules, supports alerts and reports """ @@ -128,7 +128,7 @@ class ReportSchedule(AuditMixinNullable, ExtraJSONMixin, Model): ) timezone = Column(String(100), default="UTC", nullable=False) report_format = Column(String(50), default=ReportDataFormat.VISUALIZATION) - sql = Column(Text()) + sql = Column(MediumText()) # (Alerts/Reports) M-O to chart chart_id = Column(Integer, ForeignKey("slices.id"), nullable=True) chart = relationship(Slice, backref="report_schedules", foreign_keys=[chart_id]) @@ -150,11 +150,11 @@ class ReportSchedule(AuditMixinNullable, ExtraJSONMixin, Model): last_eval_dttm = Column(DateTime) last_state = Column(String(50), default=ReportState.NOOP) last_value = Column(Float) - last_value_row_json = Column(Text) + last_value_row_json = Column(MediumText()) # (Alerts) Observed value validation related columns validator_type = Column(String(100)) - validator_config_json = Column(Text, default="{}") + validator_config_json = Column(MediumText(), default="{}") # Log retention log_retention = Column(Integer, default=90) @@ -187,7 +187,7 @@ class ReportRecipients(Model, AuditMixinNullable): __tablename__ = "report_recipient" id = Column(Integer, primary_key=True) type = Column(String(50), nullable=False) - recipient_config_json = Column(Text, default="{}") + recipient_config_json = Column(MediumText(), default="{}") report_schedule_id = Column( Integer, ForeignKey("report_schedule.id"), nullable=False ) @@ -203,7 +203,6 @@ class ReportRecipients(Model, AuditMixinNullable): class ReportExecutionLog(Model): # pylint: disable=too-few-public-methods - """ Report Execution Log, hold the result of the report execution with timestamps, last observation and possible error messages @@ -220,7 +219,7 @@ class ReportExecutionLog(Model): # pylint: disable=too-few-public-methods # (Alerts) Observed values value = Column(Float) - value_row_json = Column(Text) + value_row_json = Column(MediumText()) state = Column(String(50), nullable=False) error_message = Column(Text)