From 39ba332999b1ad06686aa54b7fe399334b3c0528 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 1 Feb 2024 15:51:43 +0000 Subject: [PATCH 01/17] alembic sync migration --- .../versions/fd2a890159e9__alembic_sync.py | 354 ++++++++++++++++++ 1 file changed, 354 insertions(+) create mode 100644 backend/migrations/versions/fd2a890159e9__alembic_sync.py diff --git a/backend/migrations/versions/fd2a890159e9__alembic_sync.py b/backend/migrations/versions/fd2a890159e9__alembic_sync.py new file mode 100644 index 000000000..9b1ee45be --- /dev/null +++ b/backend/migrations/versions/fd2a890159e9__alembic_sync.py @@ -0,0 +1,354 @@ +"""_alembic_sync + +Revision ID: fd2a890159e9 +Revises: f6cd4ba7dd8d +Create Date: 2024-02-01 15:51:14.325249 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'fd2a890159e9' +down_revision = 'f6cd4ba7dd8d' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('user') + op.drop_table('notification') + op.drop_table('worksheet_query_result') + op.drop_table('item_tags') + op.drop_table('redshiftcluster_datasettable') + op.drop_table('dataset_table_profiling_job') + op.drop_table('dashboardshare') + op.drop_table('datapipeline') + op.drop_table('dashboard') + op.drop_table('vote') + op.drop_table('term_link') + op.drop_table('group_member') + op.drop_table('datapipelineenvironments') + op.drop_table('redshiftcluster_dataset') + op.drop_table('tag') + op.drop_table('redshiftcluster') + op.drop_table('glossary_node') + op.drop_table('dataset_quality_rule') + op.drop_table('feed_message') + op.drop_table('worksheet') + op.alter_column('dataset_bucket', 'partition', + existing_type=sa.VARCHAR(), + nullable=True) + op.drop_constraint('dataset_bucket_datasetUri_fkey', 'dataset_bucket', type_='foreignkey') + op.alter_column('environment_parameters', 'paramValue', + existing_type=sa.VARCHAR(), + nullable=True) + op.create_foreign_key(None, 'environment_parameters', 'environment', ['environmentUri'], ['environmentUri']) + op.alter_column('sagemaker_studio_domain', 'environmentUri', + existing_type=sa.VARCHAR(), + nullable=True) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('sagemaker_studio_domain', 'environmentUri', + existing_type=sa.VARCHAR(), + nullable=False) + op.drop_constraint(None, 'environment_parameters', type_='foreignkey') + op.alter_column('environment_parameters', 'paramValue', + existing_type=sa.VARCHAR(), + nullable=False) + op.create_foreign_key('dataset_bucket_datasetUri_fkey', 'dataset_bucket', 'dataset', ['datasetUri'], ['datasetUri']) + op.alter_column('dataset_bucket', 'partition', + existing_type=sa.VARCHAR(), + nullable=False) + op.create_table('worksheet', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('worksheetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('SamlAdminGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('sqlBody', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('chartConfig', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.Column('lastSavedAthenaQueryIdForQuery', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('lastSavedAthenaQueryIdForChart', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('worksheetUri', name='worksheet_pkey') + ) + op.create_table('feed_message', + sa.Column('feedMessageUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('creator', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=False), + sa.Column('content', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('feedMessageUri', name='feed_message_pkey') + ) + op.create_table('dataset_quality_rule', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') + ) + op.create_table('glossary_node', + sa.Column('nodeUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('parentUri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('path', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('readme', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('admin', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('nodeUri', name='glossary_node_pkey') + ) + op.create_table('redshiftcluster', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseUser', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterUsername', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('numberOfNodes', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('securityGroupIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('IAMRoles', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('endpoint', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('port', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('datahubSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('external_schema_created', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], name='fk_redshiftcluster_env_uri'), + sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') + ) + op.create_table('tag', + sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('id', name='tag_pkey') + ) + op.create_table('redshiftcluster_dataset', + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') + ) + op.create_table('datapipelineenvironments', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('envPipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('environmentLabel', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('pipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('pipelineLabel', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('stage', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('order', sa.INTEGER(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('samlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('envPipelineUri', name='datapipelineenvironments_pkey') + ) + op.create_table('group_member', + sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') + ) + op.create_table('term_link', + sa.Column('linkUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('nodeUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('approvedBySteward', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('approvedByOwner', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('linkUri', name='term_link_pkey') + ) + op.create_table('vote', + sa.Column('voteUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('username', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('upvote', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('voteUri', name='vote_pkey') + ) + op.create_table('dashboard', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('dashboardUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('namespace', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('DashboardId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], name='fk_dashboard_env_uri'), + sa.PrimaryKeyConstraint('dashboardUri', name='dashboard_pkey') + ) + op.create_table('datapipeline', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('DataPipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('repo', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('devStrategy', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('template', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], name='fk_datapipeline_env_uri'), + sa.PrimaryKeyConstraint('DataPipelineUri', name='sqlpipeline_pkey') + ) + op.create_table('dashboardshare', + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('dashboardUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('shareUri', 'dashboardUri', name='dashboardshare_pkey') + ) + op.create_table('dataset_table_profiling_job', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') + ) + op.create_table('redshiftcluster_datasettable', + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', name='redshiftcluster_datasettable_pkey') + ) + op.create_table('item_tags', + sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') + ) + op.create_table('worksheet_query_result', + sa.Column('worksheetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AthenaQueryId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('queryType', postgresql.ENUM('chart', 'data', name='querytype'), autoincrement=False, nullable=False), + sa.Column('sqlBody', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('OutputLocation', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('error', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('ElapsedTimeInMs', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('DataScannedInBytes', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('AthenaQueryId', name='worksheet_query_result_pkey') + ) + op.create_table('notification', + sa.Column('notificationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('type', sa.VARCHAR(length=100), autoincrement=False, nullable=True), + sa.Column('message', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('recipient', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('is_read', sa.BOOLEAN(), autoincrement=False, nullable=False), + sa.Column('target_uri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('notificationUri', name='notification_pkey') + ) + op.create_table('user', + sa.Column('userId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('userId', name='user_pkey') + ) + # ### end Alembic commands ### From d01aeac0bfa7d8bac87e265508b5ebb6cab88c8d Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 1 Feb 2024 16:13:22 +0000 Subject: [PATCH 02/17] alembic README now in md --- backend/migrations/{README => README.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename backend/migrations/{README => README.md} (82%) diff --git a/backend/migrations/README b/backend/migrations/README.md similarity index 82% rename from backend/migrations/README rename to backend/migrations/README.md index 1bcc2ce5f..f24fdfb34 100644 --- a/backend/migrations/README +++ b/backend/migrations/README.md @@ -3,7 +3,7 @@ To Generate alembic migration during development: ``` export PYTHONPATH=backend export envname=local -alembic -c backend/alembic.ini revision -m "_release_vX.X.X" +alembic -c backend/alembic.ini revision -m "_release_vX.X.X" --autogenerate ``` To run the upgrade (this is part of the deployment pipeline) From 7aa8658aeaa76902b2bb65fe9dd8a7e865e72c4c Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 1 Feb 2024 16:15:46 +0000 Subject: [PATCH 03/17] lint fix --- .../versions/fd2a890159e9__alembic_sync.py | 562 +++++++++--------- 1 file changed, 284 insertions(+), 278 deletions(-) diff --git a/backend/migrations/versions/fd2a890159e9__alembic_sync.py b/backend/migrations/versions/fd2a890159e9__alembic_sync.py index 9b1ee45be..2a2658e7a 100644 --- a/backend/migrations/versions/fd2a890159e9__alembic_sync.py +++ b/backend/migrations/versions/fd2a890159e9__alembic_sync.py @@ -39,316 +39,322 @@ def upgrade(): op.drop_table('feed_message') op.drop_table('worksheet') op.alter_column('dataset_bucket', 'partition', - existing_type=sa.VARCHAR(), - nullable=True) + existing_type=sa.VARCHAR(), + nullable=True) op.drop_constraint('dataset_bucket_datasetUri_fkey', 'dataset_bucket', type_='foreignkey') op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=True) + existing_type=sa.VARCHAR(), + nullable=True) op.create_foreign_key(None, 'environment_parameters', 'environment', ['environmentUri'], ['environmentUri']) op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=True) + existing_type=sa.VARCHAR(), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=False) + existing_type=sa.VARCHAR(), + nullable=False) op.drop_constraint(None, 'environment_parameters', type_='foreignkey') op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=False) + existing_type=sa.VARCHAR(), + nullable=False) op.create_foreign_key('dataset_bucket_datasetUri_fkey', 'dataset_bucket', 'dataset', ['datasetUri'], ['datasetUri']) op.alter_column('dataset_bucket', 'partition', - existing_type=sa.VARCHAR(), - nullable=False) + existing_type=sa.VARCHAR(), + nullable=False) op.create_table('worksheet', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('worksheetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('SamlAdminGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('sqlBody', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('chartConfig', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), - sa.Column('lastSavedAthenaQueryIdForQuery', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('lastSavedAthenaQueryIdForChart', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('worksheetUri', name='worksheet_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('worksheetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('SamlAdminGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('sqlBody', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('chartConfig', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, + nullable=True), + sa.Column('lastSavedAthenaQueryIdForQuery', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('lastSavedAthenaQueryIdForChart', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('worksheetUri', name='worksheet_pkey') + ) op.create_table('feed_message', - sa.Column('feedMessageUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('creator', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=False), - sa.Column('content', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('feedMessageUri', name='feed_message_pkey') - ) + sa.Column('feedMessageUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('creator', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=False), + sa.Column('content', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('feedMessageUri', name='feed_message_pkey') + ) op.create_table('dataset_quality_rule', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') + ) op.create_table('glossary_node', - sa.Column('nodeUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('parentUri', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('path', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('readme', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('admin', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('nodeUri', name='glossary_node_pkey') - ) + sa.Column('nodeUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('parentUri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('path', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('readme', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('admin', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('nodeUri', name='glossary_node_pkey') + ) op.create_table('redshiftcluster', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('databaseUser', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterUsername', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('numberOfNodes', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('subnetGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('subnetIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('securityGroupIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('IAMRoles', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('endpoint', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('port', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('datahubSecret', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterSecret', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('external_schema_created', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], name='fk_redshiftcluster_env_uri'), - sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseUser', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterUsername', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('numberOfNodes', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('securityGroupIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('IAMRoles', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('endpoint', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('port', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('datahubSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('external_schema_created', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], + name='fk_redshiftcluster_env_uri'), + sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') + ) op.create_table('tag', - sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('id', name='tag_pkey') - ) + sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('id', name='tag_pkey') + ) op.create_table('redshiftcluster_dataset', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') - ) + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') + ) op.create_table('datapipelineenvironments', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('envPipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('environmentLabel', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('pipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('pipelineLabel', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('stage', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('order', sa.INTEGER(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('samlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('envPipelineUri', name='datapipelineenvironments_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('envPipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('environmentLabel', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('pipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('pipelineLabel', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('stage', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('order', sa.INTEGER(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('samlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('envPipelineUri', name='datapipelineenvironments_pkey') + ) op.create_table('group_member', - sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') - ) + sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') + ) op.create_table('term_link', - sa.Column('linkUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('nodeUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('approvedBySteward', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('approvedByOwner', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('linkUri', name='term_link_pkey') - ) + sa.Column('linkUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('nodeUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('approvedBySteward', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('approvedByOwner', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('linkUri', name='term_link_pkey') + ) op.create_table('vote', - sa.Column('voteUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('username', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('upvote', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('voteUri', name='vote_pkey') - ) + sa.Column('voteUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('username', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('upvote', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('voteUri', name='vote_pkey') + ) op.create_table('dashboard', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('dashboardUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('namespace', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('DashboardId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], name='fk_dashboard_env_uri'), - sa.PrimaryKeyConstraint('dashboardUri', name='dashboard_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('dashboardUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('namespace', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('DashboardId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], + name='fk_dashboard_env_uri'), + sa.PrimaryKeyConstraint('dashboardUri', name='dashboard_pkey') + ) op.create_table('datapipeline', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('DataPipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('repo', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('devStrategy', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('template', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], name='fk_datapipeline_env_uri'), - sa.PrimaryKeyConstraint('DataPipelineUri', name='sqlpipeline_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('DataPipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('repo', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('devStrategy', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('template', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], + name='fk_datapipeline_env_uri'), + sa.PrimaryKeyConstraint('DataPipelineUri', name='sqlpipeline_pkey') + ) op.create_table('dashboardshare', - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('dashboardUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('shareUri', 'dashboardUri', name='dashboardshare_pkey') - ) + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('dashboardUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('shareUri', 'dashboardUri', name='dashboardshare_pkey') + ) op.create_table('dataset_table_profiling_job', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') + ) op.create_table('redshiftcluster_datasettable', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', name='redshiftcluster_datasettable_pkey') - ) + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', + name='redshiftcluster_datasettable_pkey') + ) op.create_table('item_tags', - sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') - ) + sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') + ) op.create_table('worksheet_query_result', - sa.Column('worksheetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AthenaQueryId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('queryType', postgresql.ENUM('chart', 'data', name='querytype'), autoincrement=False, nullable=False), - sa.Column('sqlBody', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('OutputLocation', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('error', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('ElapsedTimeInMs', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('DataScannedInBytes', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('AthenaQueryId', name='worksheet_query_result_pkey') - ) + sa.Column('worksheetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AthenaQueryId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('queryType', postgresql.ENUM('chart', 'data', name='querytype'), autoincrement=False, + nullable=False), + sa.Column('sqlBody', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('OutputLocation', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('error', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('ElapsedTimeInMs', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('DataScannedInBytes', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('AthenaQueryId', name='worksheet_query_result_pkey') + ) op.create_table('notification', - sa.Column('notificationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('type', sa.VARCHAR(length=100), autoincrement=False, nullable=True), - sa.Column('message', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('recipient', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('is_read', sa.BOOLEAN(), autoincrement=False, nullable=False), - sa.Column('target_uri', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('notificationUri', name='notification_pkey') - ) + sa.Column('notificationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('type', sa.VARCHAR(length=100), autoincrement=False, nullable=True), + sa.Column('message', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('recipient', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('is_read', sa.BOOLEAN(), autoincrement=False, nullable=False), + sa.Column('target_uri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('notificationUri', name='notification_pkey') + ) op.create_table('user', - sa.Column('userId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('userId', name='user_pkey') - ) + sa.Column('userId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('userId', name='user_pkey') + ) # ### end Alembic commands ### From 5438acb121215658a4e56986bbf49d52fafd0340 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Mon, 5 Feb 2024 11:34:08 +0000 Subject: [PATCH 04/17] sync local db and migrations add missing models to env.py ignore user table as it's for local purposes --- backend/alembic.ini | 3 + backend/migrations/env.py | 22 +- .../a375771e4d0f__alembic_sync_state.py | 183 +++++++++ .../versions/fd2a890159e9__alembic_sync.py | 360 ------------------ 4 files changed, 207 insertions(+), 361 deletions(-) create mode 100644 backend/migrations/versions/a375771e4d0f__alembic_sync_state.py delete mode 100644 backend/migrations/versions/fd2a890159e9__alembic_sync.py diff --git a/backend/alembic.ini b/backend/alembic.ini index 3bc9ee960..4c8e7c72b 100644 --- a/backend/alembic.ini +++ b/backend/alembic.ini @@ -43,3 +43,6 @@ formatter = generic [formatter_generic] format = %(levelname)-5.5s [%(name)s] %(message)s datefmt = %H:%M:%S + +[alembic:exclude] +tables = user \ No newline at end of file diff --git a/backend/migrations/env.py b/backend/migrations/env.py index 277278357..23e32ec33 100644 --- a/backend/migrations/env.py +++ b/backend/migrations/env.py @@ -1,6 +1,18 @@ from __future__ import with_statement from alembic import context from logging.config import fileConfig +import re + +# import additional models here + +from dataall.modules.catalog.db.glossary_models import GlossaryNode, TermLink +from dataall.modules.dashboards.db.dashboard_models import DashboardShare, Dashboard +from dataall.modules.datapipelines import DataPipeline +from dataall.modules.datapipelines.db.datapipelines_models import DataPipelineEnvironment +from dataall.modules.feed.db.feed_models import FeedMessage +from dataall.modules.notifications.db.notification_models import Notification +from dataall.modules.vote.db.vote_models import Vote +from dataall.modules.worksheets.db.worksheet_models import WorksheetQueryResult, Worksheet # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -25,6 +37,13 @@ # ... etc. +exclude_tables = config.get_section('alembic:exclude').get('tables', '').split(',') + + +def include_object(object, name, type_, *args, **kwargs): + return not (type_ == 'table' and name in exclude_tables) + + def run_migrations_offline(): """Run migrations in 'offline' mode. @@ -47,6 +66,7 @@ def run_migrations_offline(): target_metadata=target_metadata, version_table_schema=ENVNAME, literal_binds=True, + include_object=include_object ) with context.begin_transaction(): @@ -62,7 +82,7 @@ def run_migrations_online(): """ with get_engine(ENVNAME).engine.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure(connection=connection, target_metadata=target_metadata, include_object=include_object) with context.begin_transaction(): context.run_migrations() diff --git a/backend/migrations/versions/a375771e4d0f__alembic_sync_state.py b/backend/migrations/versions/a375771e4d0f__alembic_sync_state.py new file mode 100644 index 000000000..30ac7c2eb --- /dev/null +++ b/backend/migrations/versions/a375771e4d0f__alembic_sync_state.py @@ -0,0 +1,183 @@ +"""_alembic_sync_state + +Revision ID: a375771e4d0f +Revises: f6cd4ba7dd8d +Create Date: 2024-02-05 11:30:56.177755 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'a375771e4d0f' +down_revision = 'f6cd4ba7dd8d' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('item_tags') + op.drop_table('dataset_table_profiling_job') + op.drop_table('redshiftcluster') + op.drop_table('dataset_quality_rule') + op.drop_table('redshiftcluster_datasettable') + op.drop_table('redshiftcluster_dataset') + op.drop_table('tag') + op.drop_table('group_member') + op.alter_column('datapipelineenvironments', 'region', + existing_type=sa.VARCHAR(), + nullable=True) + op.alter_column('dataset_bucket', 'partition', + existing_type=sa.VARCHAR(), + nullable=True) + op.drop_constraint('dataset_bucket_datasetUri_fkey', 'dataset_bucket', type_='foreignkey') + op.alter_column('environment_parameters', 'paramValue', + existing_type=sa.VARCHAR(), + nullable=True) + op.create_foreign_key(None, 'environment_parameters', 'environment', ['environmentUri'], ['environmentUri']) + op.alter_column('sagemaker_studio_domain', 'environmentUri', + existing_type=sa.VARCHAR(), + nullable=True) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('sagemaker_studio_domain', 'environmentUri', + existing_type=sa.VARCHAR(), + nullable=False) + op.drop_constraint(None, 'environment_parameters', type_='foreignkey') + op.alter_column('environment_parameters', 'paramValue', + existing_type=sa.VARCHAR(), + nullable=False) + op.create_foreign_key('dataset_bucket_datasetUri_fkey', 'dataset_bucket', 'dataset', ['datasetUri'], ['datasetUri']) + op.alter_column('dataset_bucket', 'partition', + existing_type=sa.VARCHAR(), + nullable=False) + op.alter_column('datapipelineenvironments', 'region', + existing_type=sa.VARCHAR(), + nullable=False) + op.create_table('group_member', + sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') + ) + op.create_table('tag', + sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('id', name='tag_pkey') + ) + op.create_table('redshiftcluster_dataset', + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') + ) + op.create_table('redshiftcluster_datasettable', + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', name='redshiftcluster_datasettable_pkey') + ) + op.create_table('dataset_quality_rule', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') + ) + op.create_table('redshiftcluster', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseUser', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterUsername', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('numberOfNodes', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('securityGroupIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('IAMRoles', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('endpoint', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('port', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('datahubSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('external_schema_created', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], name='fk_redshiftcluster_env_uri'), + sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') + ) + op.create_table('dataset_table_profiling_job', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') + ) + op.create_table('item_tags', + sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') + ) + # ### end Alembic commands ### diff --git a/backend/migrations/versions/fd2a890159e9__alembic_sync.py b/backend/migrations/versions/fd2a890159e9__alembic_sync.py deleted file mode 100644 index 2a2658e7a..000000000 --- a/backend/migrations/versions/fd2a890159e9__alembic_sync.py +++ /dev/null @@ -1,360 +0,0 @@ -"""_alembic_sync - -Revision ID: fd2a890159e9 -Revises: f6cd4ba7dd8d -Create Date: 2024-02-01 15:51:14.325249 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = 'fd2a890159e9' -down_revision = 'f6cd4ba7dd8d' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('user') - op.drop_table('notification') - op.drop_table('worksheet_query_result') - op.drop_table('item_tags') - op.drop_table('redshiftcluster_datasettable') - op.drop_table('dataset_table_profiling_job') - op.drop_table('dashboardshare') - op.drop_table('datapipeline') - op.drop_table('dashboard') - op.drop_table('vote') - op.drop_table('term_link') - op.drop_table('group_member') - op.drop_table('datapipelineenvironments') - op.drop_table('redshiftcluster_dataset') - op.drop_table('tag') - op.drop_table('redshiftcluster') - op.drop_table('glossary_node') - op.drop_table('dataset_quality_rule') - op.drop_table('feed_message') - op.drop_table('worksheet') - op.alter_column('dataset_bucket', 'partition', - existing_type=sa.VARCHAR(), - nullable=True) - op.drop_constraint('dataset_bucket_datasetUri_fkey', 'dataset_bucket', type_='foreignkey') - op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=True) - op.create_foreign_key(None, 'environment_parameters', 'environment', ['environmentUri'], ['environmentUri']) - op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=True) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=False) - op.drop_constraint(None, 'environment_parameters', type_='foreignkey') - op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=False) - op.create_foreign_key('dataset_bucket_datasetUri_fkey', 'dataset_bucket', 'dataset', ['datasetUri'], ['datasetUri']) - op.alter_column('dataset_bucket', 'partition', - existing_type=sa.VARCHAR(), - nullable=False) - op.create_table('worksheet', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('worksheetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('SamlAdminGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('sqlBody', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('chartConfig', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, - nullable=True), - sa.Column('lastSavedAthenaQueryIdForQuery', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('lastSavedAthenaQueryIdForChart', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('worksheetUri', name='worksheet_pkey') - ) - op.create_table('feed_message', - sa.Column('feedMessageUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('creator', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=False), - sa.Column('content', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('feedMessageUri', name='feed_message_pkey') - ) - op.create_table('dataset_quality_rule', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') - ) - op.create_table('glossary_node', - sa.Column('nodeUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('parentUri', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('path', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('readme', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('admin', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('nodeUri', name='glossary_node_pkey') - ) - op.create_table('redshiftcluster', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('databaseUser', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterUsername', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('numberOfNodes', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('subnetGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('subnetIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('securityGroupIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('IAMRoles', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('endpoint', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('port', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('datahubSecret', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterSecret', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('external_schema_created', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], - name='fk_redshiftcluster_env_uri'), - sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') - ) - op.create_table('tag', - sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('id', name='tag_pkey') - ) - op.create_table('redshiftcluster_dataset', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') - ) - op.create_table('datapipelineenvironments', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('envPipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('environmentLabel', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('pipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('pipelineLabel', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('stage', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('order', sa.INTEGER(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('samlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('envPipelineUri', name='datapipelineenvironments_pkey') - ) - op.create_table('group_member', - sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') - ) - op.create_table('term_link', - sa.Column('linkUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('nodeUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('approvedBySteward', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('approvedByOwner', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('linkUri', name='term_link_pkey') - ) - op.create_table('vote', - sa.Column('voteUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('username', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('targetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('targetType', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('upvote', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('voteUri', name='vote_pkey') - ) - op.create_table('dashboard', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('dashboardUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('namespace', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('DashboardId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], - name='fk_dashboard_env_uri'), - sa.PrimaryKeyConstraint('dashboardUri', name='dashboard_pkey') - ) - op.create_table('datapipeline', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('DataPipelineUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('repo', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('devStrategy', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('template', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], - name='fk_datapipeline_env_uri'), - sa.PrimaryKeyConstraint('DataPipelineUri', name='sqlpipeline_pkey') - ) - op.create_table('dashboardshare', - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('dashboardUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('shareUri', 'dashboardUri', name='dashboardshare_pkey') - ) - op.create_table('dataset_table_profiling_job', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') - ) - op.create_table('redshiftcluster_datasettable', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', - name='redshiftcluster_datasettable_pkey') - ) - op.create_table('item_tags', - sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') - ) - op.create_table('worksheet_query_result', - sa.Column('worksheetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AthenaQueryId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('queryType', postgresql.ENUM('chart', 'data', name='querytype'), autoincrement=False, - nullable=False), - sa.Column('sqlBody', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('OutputLocation', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('error', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('ElapsedTimeInMs', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('DataScannedInBytes', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('AthenaQueryId', name='worksheet_query_result_pkey') - ) - op.create_table('notification', - sa.Column('notificationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('type', sa.VARCHAR(length=100), autoincrement=False, nullable=True), - sa.Column('message', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('recipient', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('is_read', sa.BOOLEAN(), autoincrement=False, nullable=False), - sa.Column('target_uri', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('notificationUri', name='notification_pkey') - ) - op.create_table('user', - sa.Column('userId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('userId', name='user_pkey') - ) - # ### end Alembic commands ### From 1c67877256bdec7d1fe673c9913fc62140d96b06 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Mon, 5 Feb 2024 16:02:09 +0000 Subject: [PATCH 05/17] Readme about migrations and make command for autogeneration. --- Makefile | 7 ++++ backend/migrations/README.md | 81 ++++++++++++++++++++++++++++++++---- 2 files changed, 80 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index fed73e18c..e1fc5a06a 100644 --- a/Makefile +++ b/Makefile @@ -85,6 +85,13 @@ upgrade-db: upgrade-pip install-backend export PYTHONPATH=./backend && \ alembic -c backend/alembic.ini upgrade head +generate-migrations: upgrade-pip install-backend + pip install 'alembic' + export PYTHONPATH=./backend && \ + alembic -c backend/alembic.ini upgrade head + alembic -c backend/alembic.ini revision -m "_describe_changes_shortly" --autogenerate + + version-major: pip install bump2version git config --global user.email git-cicd@codecommit.com diff --git a/backend/migrations/README.md b/backend/migrations/README.md index f24fdfb34..08360492c 100644 --- a/backend/migrations/README.md +++ b/backend/migrations/README.md @@ -1,19 +1,84 @@ -To Generate alembic migration during development: +# Managing DB with Alembic locally +When we run ```docker-compose up``` the postgres container is created with no tables or schemas. + +Upon start of GraphQL container, sqlalchemy ```declarative_base``` is used to create all tables with this function: +```Base.metadata.create_all(engine.engine)```. **The number of tables depends on the modules that are enabled in ```config.json``` (in the root of the project).** + +As the database is created from scratch, it has no current information about migration state, so, first we need to run database upgrade. +After that alembic will be able to generate the further migrations localy. + +To upgrade database without generating migrations use +```bash +make upgrade-db ``` +or +```bash export PYTHONPATH=backend export envname=local -alembic -c backend/alembic.ini revision -m "_release_vX.X.X" --autogenerate +alembic -c backend/alembic.ini upgrade head ``` +This command will apply all migrations, and syncronize the DB state with local list of migraitions. + +To upgrade database and generate alembic migration during development use: -To run the upgrade (this is part of the deployment pipeline) ```bash -alembic -c backend/alembic.ini upgrade head +make generate-migrations ``` - -To run migrations locally +or ```bash -envname=local -alembic revision upgrade head +export PYTHONPATH=backend +export envname=local +alembic -c backend/alembic.ini upgrade head +alembic -c backend/alembic.ini revision -m "_describe_changes_shortly" --autogenerate ``` +Please, change the auto-generated filename postfix with the short description of the migration purpose. + +## What to know about autogenerated migrations: + +**Autogenerate will detect:** + + - Table additions, removals. + - Column additions, removals. + - Change of nullable status on columns. + - Basic changes in indexes and explicitly-named unique constraints + - Basic changes in foreign key constraints + +**Autogenerate can optionally detect:** + + - Change of column type. This will occur by default unless the parameter `EnvironmentContext.configure.compare_type` is set to `False`. The default implementation will reliably detect major changes, such as between `Numeric` and `String`, as well as accommodate for the types generated by SQLAlchemy’s “generic” types such as `Boolean`. Arguments that are shared between both types, such as length and precision values, will also be compared. If either the metadata type or database type has additional arguments beyond that of the other type, these are not compared, such as if one numeric type featured a “scale” and other type did not, this would be seen as the backing database not supporting the value, or reporting on a default that the metadata did not specify. + +The type comparison logic is fully extensible as well; see [Comparing Types](https://alembic.sqlalchemy.org/en/latest/autogenerate.html#compare-types) for details. + + - Change of server default. This will occur if you set the `EnvironmentContext.configure.compare_server_default` parameter to `True`, or to a custom callable function. This feature works well for simple cases but cannot always produce accurate results. The Postgresql backend will actually invoke the “detected” and “metadata” values against the database to determine equivalence. The feature is off by default so that it can be tested on the target schema first. Like type comparison, it can also be customized by passing a callable; see the function’s documentation for details. + +**Autogenerate can not detect:** + +- Changes of table name. These will come out as an add/drop of two different tables, and should be hand-edited into a name change instead. + In this case you should remove the automatically generated scripts and replace them with the following code (e.g. renaming table 'marathon' to 'snickers): + ```python + def upgrade(): + op.rename_table('marathon', 'snickers') + op.execute('ALTER SEQUENCE marathon_id_seq RENAME TO snickers_id_seq') # don't forget to rename all related entities + op.execute('ALTER INDEX marathon_pkey RENAME TO snickers_pkey') + + def downgrade(): + op.rename_table('snickers', 'marathon') + op.execute('ALTER SEQUENCE snickers_id_seq RENAME TO marathon_id_seq') + op.execute('ALTER INDEX snickers_pkey RENAME TO marathon_pkey') + ``` +- Changes of column name. Like table name changes, these are detected as a column add/drop pair, which is not at all the same as a name change. + To keep all data in the column add this script to upgrade function (and don't forget to add inverse in downgrade function) + ```python + with op.batch_alter_table('my_table', schema=None) as batch_op: batch_op.alter_column('old_col_name', new_column_name='new_col_name') + ``` +- Anonymously named constraints. Give your constraints a name, e.g. `UniqueConstraint('col1', 'col2', name="my_name")`. See the section The [Importance of Naming Constraints ](https://alembic.sqlalchemy.org/en/latest/naming.html)for background on how to configure automatic naming schemes for constraints. +- Special SQLAlchemy types such as Enum when generated on a backend which doesn’t support ENUM directly - this because the representation of such a type in the non-supporting database, i.e. a CHAR+ CHECK constraint, could be any kind of CHAR+CHECK. For SQLAlchemy to determine that this is actually an ENUM would only be a guess, something that’s generally a bad idea. To implement your own “guessing” function here, use the `sqlalchemy.events.DDLEvents.column_reflect()` event to detect when a CHAR (or whatever the target type is) is reflected, and change it to an ENUM (or whatever type is desired) if it is known that that’s the intent of the type. `The sqlalchemy.events.DDLEvents.after_parent_attach()` can be used within the autogenerate process to intercept and un-attach unwanted CHECK constraints. + As example of handling Enums, please refer to `ConfidentialityClassification` in migration `97050ec09354_release_3_7_8.py` + + +**Autogenerate can’t currently, but will eventually detect:** +- Some free-standing constraint additions and removals may not be supported, including PRIMARY KEY, EXCLUDE, CHECK; these are not necessarily implemented within the autogenerate detection system and also may not be supported by the supporting SQLAlchemy dialect. +- Sequence additions, removals - not yet implemented. + https://alembic.sqlalchemy.org/en/latest/ \ No newline at end of file From d9ae63293276fefd3ab2c8800f079ed0e56f44cd Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Mon, 5 Feb 2024 16:10:42 +0000 Subject: [PATCH 06/17] Another set of comments and lint-improvements --- backend/migrations/env.py | 3 + .../a375771e4d0f__alembic_sync_state.py | 260 +++++++++--------- 2 files changed, 134 insertions(+), 129 deletions(-) diff --git a/backend/migrations/env.py b/backend/migrations/env.py index 23e32ec33..98e13a507 100644 --- a/backend/migrations/env.py +++ b/backend/migrations/env.py @@ -3,7 +3,10 @@ from logging.config import fileConfig import re + +# DO NOT DELETE # import additional models here +# they are not used directly in env.py, but these imports are important for alembic from dataall.modules.catalog.db.glossary_models import GlossaryNode, TermLink from dataall.modules.dashboards.db.dashboard_models import DashboardShare, Dashboard diff --git a/backend/migrations/versions/a375771e4d0f__alembic_sync_state.py b/backend/migrations/versions/a375771e4d0f__alembic_sync_state.py index 30ac7c2eb..9263b3180 100644 --- a/backend/migrations/versions/a375771e4d0f__alembic_sync_state.py +++ b/backend/migrations/versions/a375771e4d0f__alembic_sync_state.py @@ -27,157 +27,159 @@ def upgrade(): op.drop_table('tag') op.drop_table('group_member') op.alter_column('datapipelineenvironments', 'region', - existing_type=sa.VARCHAR(), - nullable=True) + existing_type=sa.VARCHAR(), + nullable=True) op.alter_column('dataset_bucket', 'partition', - existing_type=sa.VARCHAR(), - nullable=True) + existing_type=sa.VARCHAR(), + nullable=True) op.drop_constraint('dataset_bucket_datasetUri_fkey', 'dataset_bucket', type_='foreignkey') op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=True) + existing_type=sa.VARCHAR(), + nullable=True) op.create_foreign_key(None, 'environment_parameters', 'environment', ['environmentUri'], ['environmentUri']) op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=True) + existing_type=sa.VARCHAR(), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=False) + existing_type=sa.VARCHAR(), + nullable=False) op.drop_constraint(None, 'environment_parameters', type_='foreignkey') op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=False) + existing_type=sa.VARCHAR(), + nullable=False) op.create_foreign_key('dataset_bucket_datasetUri_fkey', 'dataset_bucket', 'dataset', ['datasetUri'], ['datasetUri']) op.alter_column('dataset_bucket', 'partition', - existing_type=sa.VARCHAR(), - nullable=False) + existing_type=sa.VARCHAR(), + nullable=False) op.alter_column('datapipelineenvironments', 'region', - existing_type=sa.VARCHAR(), - nullable=False) + existing_type=sa.VARCHAR(), + nullable=False) op.create_table('group_member', - sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') - ) + sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') + ) op.create_table('tag', - sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('id', name='tag_pkey') - ) + sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('id', name='tag_pkey') + ) op.create_table('redshiftcluster_dataset', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') - ) + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') + ) op.create_table('redshiftcluster_datasettable', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', name='redshiftcluster_datasettable_pkey') - ) + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', + name='redshiftcluster_datasettable_pkey') + ) op.create_table('dataset_quality_rule', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') + ) op.create_table('redshiftcluster', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('databaseUser', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterUsername', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('numberOfNodes', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('subnetGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('subnetIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('securityGroupIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('IAMRoles', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('endpoint', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('port', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('datahubSecret', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterSecret', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('external_schema_created', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], name='fk_redshiftcluster_env_uri'), - sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseUser', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterUsername', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('numberOfNodes', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('securityGroupIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('IAMRoles', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('endpoint', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('port', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('datahubSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('external_schema_created', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], + name='fk_redshiftcluster_env_uri'), + sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') + ) op.create_table('dataset_table_profiling_job', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') + ) op.create_table('item_tags', - sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') - ) + sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') + ) # ### end Alembic commands ### From 3f19bbf69095efda8d21c52bec236b9235ba0b6b Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 8 Feb 2024 13:06:09 +0000 Subject: [PATCH 07/17] Changes requested by PR-reviewers --- .../datasets_base/db/dataset_models.py | 4 +- backend/migrations/README.md | 43 +++- .../a375771e4d0f__alembic_sync_state.py | 185 ------------------ .../versions/c72d8c5272a8__alembic_sync.py | 175 +++++++++++++++++ 4 files changed, 212 insertions(+), 195 deletions(-) delete mode 100644 backend/migrations/versions/a375771e4d0f__alembic_sync_state.py create mode 100644 backend/migrations/versions/c72d8c5272a8__alembic_sync.py diff --git a/backend/dataall/modules/datasets_base/db/dataset_models.py b/backend/dataall/modules/datasets_base/db/dataset_models.py index f3bf9f204..c8e8ca464 100644 --- a/backend/dataall/modules/datasets_base/db/dataset_models.py +++ b/backend/dataall/modules/datasets_base/db/dataset_models.py @@ -148,12 +148,12 @@ def uri(cls): class DatasetBucket(Resource, Base): __tablename__ = 'dataset_bucket' - datasetUri = Column(String, nullable=False) + datasetUri = Column(String, ForeignKey("dataset.datasetUri"), nullable=False) bucketUri = Column(String, primary_key=True, default=utils.uuid('bucket')) AwsAccountId = Column(String, nullable=False) S3BucketName = Column(String, nullable=False) region = Column(String, default='eu-west-1') - partition = Column(String, default='aws') + partition = Column(String, default='aws', nullable=False) KmsAlias = Column(String, nullable=False) imported = Column(Boolean, default=False) importedKmsKey = Column(Boolean, default=False) diff --git a/backend/migrations/README.md b/backend/migrations/README.md index 08360492c..cdc7b9420 100644 --- a/backend/migrations/README.md +++ b/backend/migrations/README.md @@ -1,24 +1,53 @@ # Managing DB with Alembic locally +In data.all we use [Alembic](https://alembic.sqlalchemy.org/en/latest/) -- a lightweight database migration tool for usage with the SQLAlchemy Database Toolkit for Python. +Alembic relies on the database's current state to generate and apply migrations accurately. +Alembic determines the changes to be made by comparing the current state of the database with the desired state specified in your SQLAlchemy models. This process, known as schema diffing, requires an existing database to identify differences. +Alembic generates migration scripts based on the detected differences between the current database schema and the desired schema defined in your application code. This generation is dependent on the actual structure and content of the database. -When we run ```docker-compose up``` the postgres container is created with no tables or schemas. +In order to create and test migrations locally you will have to create a local database. + +## Prerequisites + +1. Build and launch Docker containers for the database and GraphQL. +```bash +docker compose build db +docker compose run db +docker compose build graphql +docker compose run graphql +``` +These can also be initiated alongside all local testing containers using the following command: +```bash +docker compose up +``` +2. Specify the location of database model descriptions. If you are at the project's root, the target folder path is backend. +```bash +export PYTHONPATH=backend +``` +3. The containers initiated in the first step will default to using the schema named `local`. Alembic relies on the environmental variable `envname` to determine the schema. Set it to `local` with the following command: +```bash +export envname=local +``` +In a real-life RDS database, `envname` adopts the value of the environment (e.g., dev, test, etc.). + + +## Managing migrations + +When we run ```docker compose build``` the postgres container is created with no tables or schemas. Upon start of GraphQL container, sqlalchemy ```declarative_base``` is used to create all tables with this function: ```Base.metadata.create_all(engine.engine)```. **The number of tables depends on the modules that are enabled in ```config.json``` (in the root of the project).** As the database is created from scratch, it has no current information about migration state, so, first we need to run database upgrade. -After that alembic will be able to generate the further migrations localy. +After that alembic will be able to generate the further migrations locally. -To upgrade database without generating migrations use +This command will apply all migrations, and syncronize the DB state with local alembic historyt of migrations. ```bash make upgrade-db ``` or ```bash -export PYTHONPATH=backend -export envname=local alembic -c backend/alembic.ini upgrade head ``` -This command will apply all migrations, and syncronize the DB state with local list of migraitions. To upgrade database and generate alembic migration during development use: @@ -27,8 +56,6 @@ make generate-migrations ``` or ```bash -export PYTHONPATH=backend -export envname=local alembic -c backend/alembic.ini upgrade head alembic -c backend/alembic.ini revision -m "_describe_changes_shortly" --autogenerate ``` diff --git a/backend/migrations/versions/a375771e4d0f__alembic_sync_state.py b/backend/migrations/versions/a375771e4d0f__alembic_sync_state.py deleted file mode 100644 index 9263b3180..000000000 --- a/backend/migrations/versions/a375771e4d0f__alembic_sync_state.py +++ /dev/null @@ -1,185 +0,0 @@ -"""_alembic_sync_state - -Revision ID: a375771e4d0f -Revises: f6cd4ba7dd8d -Create Date: 2024-02-05 11:30:56.177755 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = 'a375771e4d0f' -down_revision = 'f6cd4ba7dd8d' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('item_tags') - op.drop_table('dataset_table_profiling_job') - op.drop_table('redshiftcluster') - op.drop_table('dataset_quality_rule') - op.drop_table('redshiftcluster_datasettable') - op.drop_table('redshiftcluster_dataset') - op.drop_table('tag') - op.drop_table('group_member') - op.alter_column('datapipelineenvironments', 'region', - existing_type=sa.VARCHAR(), - nullable=True) - op.alter_column('dataset_bucket', 'partition', - existing_type=sa.VARCHAR(), - nullable=True) - op.drop_constraint('dataset_bucket_datasetUri_fkey', 'dataset_bucket', type_='foreignkey') - op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=True) - op.create_foreign_key(None, 'environment_parameters', 'environment', ['environmentUri'], ['environmentUri']) - op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=True) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=False) - op.drop_constraint(None, 'environment_parameters', type_='foreignkey') - op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=False) - op.create_foreign_key('dataset_bucket_datasetUri_fkey', 'dataset_bucket', 'dataset', ['datasetUri'], ['datasetUri']) - op.alter_column('dataset_bucket', 'partition', - existing_type=sa.VARCHAR(), - nullable=False) - op.alter_column('datapipelineenvironments', 'region', - existing_type=sa.VARCHAR(), - nullable=False) - op.create_table('group_member', - sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') - ) - op.create_table('tag', - sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('id', name='tag_pkey') - ) - op.create_table('redshiftcluster_dataset', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') - ) - op.create_table('redshiftcluster_datasettable', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', - name='redshiftcluster_datasettable_pkey') - ) - op.create_table('dataset_quality_rule', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') - ) - op.create_table('redshiftcluster', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('databaseUser', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterUsername', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('numberOfNodes', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('subnetGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('subnetIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('securityGroupIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('IAMRoles', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('endpoint', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('port', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('datahubSecret', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterSecret', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('external_schema_created', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], - name='fk_redshiftcluster_env_uri'), - sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') - ) - op.create_table('dataset_table_profiling_job', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') - ) - op.create_table('item_tags', - sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') - ) - # ### end Alembic commands ### diff --git a/backend/migrations/versions/c72d8c5272a8__alembic_sync.py b/backend/migrations/versions/c72d8c5272a8__alembic_sync.py new file mode 100644 index 000000000..8a1bf09d6 --- /dev/null +++ b/backend/migrations/versions/c72d8c5272a8__alembic_sync.py @@ -0,0 +1,175 @@ +"""_describe_changes_shortly + +Revision ID: c72d8c5272a8 +Revises: f6cd4ba7dd8d +Create Date: 2024-02-08 12:35:45.016427 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'c72d8c5272a8' +down_revision = 'f6cd4ba7dd8d' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('item_tags') + op.drop_table('tag') + op.drop_table('redshiftcluster_datasettable') + op.drop_table('redshiftcluster') + op.drop_table('dataset_table_profiling_job') + op.drop_table('dataset_quality_rule') + op.drop_table('redshiftcluster_dataset') + op.drop_table('group_member') + op.alter_column('datapipelineenvironments', 'region', + existing_type=sa.VARCHAR(), + nullable=True) + op.alter_column('environment_parameters', 'paramValue', + existing_type=sa.VARCHAR(), + nullable=True) + op.create_foreign_key(None, 'environment_parameters', 'environment', ['environmentUri'], ['environmentUri']) + op.alter_column('sagemaker_studio_domain', 'environmentUri', + existing_type=sa.VARCHAR(), + nullable=True) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('sagemaker_studio_domain', 'environmentUri', + existing_type=sa.VARCHAR(), + nullable=False) + op.drop_constraint(None, 'environment_parameters', type_='foreignkey') + op.alter_column('environment_parameters', 'paramValue', + existing_type=sa.VARCHAR(), + nullable=False) + op.alter_column('datapipelineenvironments', 'region', + existing_type=sa.VARCHAR(), + nullable=False) + op.create_table('group_member', + sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') + ) + op.create_table('redshiftcluster_dataset', + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') + ) + op.create_table('dataset_quality_rule', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') + ) + op.create_table('dataset_table_profiling_job', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') + ) + op.create_table('redshiftcluster', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseUser', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterUsername', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('numberOfNodes', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('securityGroupIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('IAMRoles', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('endpoint', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('port', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('datahubSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('external_schema_created', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], name='fk_redshiftcluster_env_uri'), + sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') + ) + op.create_table('redshiftcluster_datasettable', + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', name='redshiftcluster_datasettable_pkey') + ) + op.create_table('tag', + sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('id', name='tag_pkey') + ) + op.create_table('item_tags', + sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') + ) + # ### end Alembic commands ### From 9b8382bc0e8be69b65e18c273a77a2ae9140e396 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 8 Feb 2024 13:08:42 +0000 Subject: [PATCH 08/17] lint fix --- .../versions/c72d8c5272a8__alembic_sync.py | 252 +++++++++--------- 1 file changed, 127 insertions(+), 125 deletions(-) diff --git a/backend/migrations/versions/c72d8c5272a8__alembic_sync.py b/backend/migrations/versions/c72d8c5272a8__alembic_sync.py index 8a1bf09d6..16027a4bb 100644 --- a/backend/migrations/versions/c72d8c5272a8__alembic_sync.py +++ b/backend/migrations/versions/c72d8c5272a8__alembic_sync.py @@ -27,149 +27,151 @@ def upgrade(): op.drop_table('redshiftcluster_dataset') op.drop_table('group_member') op.alter_column('datapipelineenvironments', 'region', - existing_type=sa.VARCHAR(), - nullable=True) + existing_type=sa.VARCHAR(), + nullable=True) op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=True) + existing_type=sa.VARCHAR(), + nullable=True) op.create_foreign_key(None, 'environment_parameters', 'environment', ['environmentUri'], ['environmentUri']) op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=True) + existing_type=sa.VARCHAR(), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=False) + existing_type=sa.VARCHAR(), + nullable=False) op.drop_constraint(None, 'environment_parameters', type_='foreignkey') op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=False) + existing_type=sa.VARCHAR(), + nullable=False) op.alter_column('datapipelineenvironments', 'region', - existing_type=sa.VARCHAR(), - nullable=False) + existing_type=sa.VARCHAR(), + nullable=False) op.create_table('group_member', - sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') - ) + sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') + ) op.create_table('redshiftcluster_dataset', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') - ) + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') + ) op.create_table('dataset_quality_rule', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') + ) op.create_table('dataset_table_profiling_job', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') + ) op.create_table('redshiftcluster', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('databaseUser', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterUsername', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('numberOfNodes', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('subnetGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('subnetIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('securityGroupIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('IAMRoles', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('endpoint', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('port', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('datahubSecret', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('masterSecret', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('external_schema_created', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], name='fk_redshiftcluster_env_uri'), - sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') - ) + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('databaseUser', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterUsername', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('nodeType', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('numberOfNodes', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('subnetIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('securityGroupIds', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('IAMRoles', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('endpoint', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('port', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('datahubSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('masterSecret', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('external_schema_created', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.ForeignKeyConstraint(['environmentUri'], ['environment.environmentUri'], + name='fk_redshiftcluster_env_uri'), + sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') + ) op.create_table('redshiftcluster_datasettable', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', name='redshiftcluster_datasettable_pkey') - ) + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', + name='redshiftcluster_datasettable_pkey') + ) op.create_table('tag', - sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('id', name='tag_pkey') - ) + sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('id', name='tag_pkey') + ) op.create_table('item_tags', - sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') - ) + sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') + ) # ### end Alembic commands ### From 9aaabe748a949ea2ef80324726725b084b944e19 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 8 Feb 2024 13:25:04 +0000 Subject: [PATCH 09/17] add delete-cascade behaviour on datasetUri foreign key for DatasetBucket model --- .../datasets_base/db/dataset_models.py | 2 +- ..._sync.py => 442dd8f223a9__alembic_sync.py} | 116 +++++++++--------- 2 files changed, 61 insertions(+), 57 deletions(-) rename backend/migrations/versions/{c72d8c5272a8__alembic_sync.py => 442dd8f223a9__alembic_sync.py} (96%) diff --git a/backend/dataall/modules/datasets_base/db/dataset_models.py b/backend/dataall/modules/datasets_base/db/dataset_models.py index c8e8ca464..73edbd235 100644 --- a/backend/dataall/modules/datasets_base/db/dataset_models.py +++ b/backend/dataall/modules/datasets_base/db/dataset_models.py @@ -148,7 +148,7 @@ def uri(cls): class DatasetBucket(Resource, Base): __tablename__ = 'dataset_bucket' - datasetUri = Column(String, ForeignKey("dataset.datasetUri"), nullable=False) + datasetUri = Column(String, ForeignKey("dataset.datasetUri", ondelete='CASCADE'), nullable=False) bucketUri = Column(String, primary_key=True, default=utils.uuid('bucket')) AwsAccountId = Column(String, nullable=False) S3BucketName = Column(String, nullable=False) diff --git a/backend/migrations/versions/c72d8c5272a8__alembic_sync.py b/backend/migrations/versions/442dd8f223a9__alembic_sync.py similarity index 96% rename from backend/migrations/versions/c72d8c5272a8__alembic_sync.py rename to backend/migrations/versions/442dd8f223a9__alembic_sync.py index 16027a4bb..a9d38bd1d 100644 --- a/backend/migrations/versions/c72d8c5272a8__alembic_sync.py +++ b/backend/migrations/versions/442dd8f223a9__alembic_sync.py @@ -1,8 +1,8 @@ """_describe_changes_shortly -Revision ID: c72d8c5272a8 +Revision ID: 442dd8f223a9 Revises: f6cd4ba7dd8d -Create Date: 2024-02-08 12:35:45.016427 +Create Date: 2024-02-08 13:19:22.898697 """ from alembic import op @@ -10,7 +10,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = 'c72d8c5272a8' +revision = '442dd8f223a9' down_revision = 'f6cd4ba7dd8d' branch_labels = None depends_on = None @@ -18,17 +18,19 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('item_tags') - op.drop_table('tag') - op.drop_table('redshiftcluster_datasettable') - op.drop_table('redshiftcluster') op.drop_table('dataset_table_profiling_job') + op.drop_table('group_member') + op.drop_table('redshiftcluster') op.drop_table('dataset_quality_rule') + op.drop_table('tag') + op.drop_table('redshiftcluster_datasettable') + op.drop_table('item_tags') op.drop_table('redshiftcluster_dataset') - op.drop_table('group_member') op.alter_column('datapipelineenvironments', 'region', existing_type=sa.VARCHAR(), nullable=True) + op.drop_constraint('dataset_bucket_datasetUri_fkey', 'dataset_bucket', type_='foreignkey') + op.create_foreign_key(None, 'dataset_bucket', 'dataset', ['datasetUri'], ['datasetUri'], ondelete='CASCADE') op.alter_column('environment_parameters', 'paramValue', existing_type=sa.VARCHAR(), nullable=True) @@ -48,26 +50,46 @@ def downgrade(): op.alter_column('environment_parameters', 'paramValue', existing_type=sa.VARCHAR(), nullable=False) + op.drop_constraint(None, 'dataset_bucket', type_='foreignkey') + op.create_foreign_key('dataset_bucket_datasetUri_fkey', 'dataset_bucket', 'dataset', ['datasetUri'], ['datasetUri']) op.alter_column('datapipelineenvironments', 'region', existing_type=sa.VARCHAR(), nullable=False) - op.create_table('group_member', - sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + op.create_table('redshiftcluster_dataset', + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') ) - op.create_table('redshiftcluster_dataset', + op.create_table('item_tags', + sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') + ) + op.create_table('redshiftcluster_datasettable', sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') + sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', + name='redshiftcluster_datasettable_pkey') + ) + op.create_table('tag', + sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('id', name='tag_pkey') ) op.create_table('dataset_quality_rule', sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), @@ -85,25 +107,6 @@ def downgrade(): sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') ) - op.create_table('dataset_table_profiling_job', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') - ) op.create_table('redshiftcluster', sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), @@ -147,31 +150,32 @@ def downgrade(): name='fk_redshiftcluster_env_uri'), sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') ) - op.create_table('redshiftcluster_datasettable', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + op.create_table('group_member', + sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', - name='redshiftcluster_datasettable_pkey') + sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') ) - op.create_table('tag', - sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), + op.create_table('dataset_table_profiling_job', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('id', name='tag_pkey') - ) - op.create_table('item_tags', - sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('jobUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('RunCommandId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') ) # ### end Alembic commands ### From c65dff5b8bc9841ab03f7f0791246cd58858c479 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Tue, 13 Feb 2024 11:14:28 +0000 Subject: [PATCH 10/17] review notes --- .../core/environment/db/environment_models.py | 2 +- .../datapipelines/db/datapipelines_models.py | 2 +- .../modules/mlstudio/db/mlstudio_models.py | 2 +- backend/migrations/README.md | 2 +- ..._sync.py => 6c9a8afee4e4__alembic_sync.py} | 148 ++++++++---------- 5 files changed, 69 insertions(+), 87 deletions(-) rename backend/migrations/versions/{442dd8f223a9__alembic_sync.py => 6c9a8afee4e4__alembic_sync.py} (92%) diff --git a/backend/dataall/core/environment/db/environment_models.py b/backend/dataall/core/environment/db/environment_models.py index 9430111da..658e02a91 100644 --- a/backend/dataall/core/environment/db/environment_models.py +++ b/backend/dataall/core/environment/db/environment_models.py @@ -64,7 +64,7 @@ class EnvironmentParameter(Base): __tablename__ = 'environment_parameters' environmentUri = Column(String, ForeignKey("environment.environmentUri"), primary_key=True) key = Column('paramKey', String, primary_key=True) - value = Column('paramValue', String, nullable=True) + value = Column('paramValue', String, nullable=False) def __init__(self, env_uri, key, value): super().__init__() diff --git a/backend/dataall/modules/datapipelines/db/datapipelines_models.py b/backend/dataall/modules/datapipelines/db/datapipelines_models.py index 0942e4ca1..7305d33fb 100644 --- a/backend/dataall/modules/datapipelines/db/datapipelines_models.py +++ b/backend/dataall/modules/datapipelines/db/datapipelines_models.py @@ -29,6 +29,6 @@ class DataPipelineEnvironment(Base, Resource): pipelineLabel = Column(String, nullable=False) stage = Column(String, nullable=False) order = Column(Integer, nullable=False) - region = Column(String, default='eu-west-1') + region = Column(String, default='eu-west-1', nullable=False) AwsAccountId = Column(String, nullable=False) samlGroupName = Column(String, nullable=False) diff --git a/backend/dataall/modules/mlstudio/db/mlstudio_models.py b/backend/dataall/modules/mlstudio/db/mlstudio_models.py index a4c93a2fa..8f178bb5a 100644 --- a/backend/dataall/modules/mlstudio/db/mlstudio_models.py +++ b/backend/dataall/modules/mlstudio/db/mlstudio_models.py @@ -11,7 +11,7 @@ class SagemakerStudioDomain(Resource, Base): """Describes ORM model for sagemaker ML Studio domain""" __tablename__ = 'sagemaker_studio_domain' - environmentUri = Column(String, ForeignKey("environment.environmentUri")) + environmentUri = Column(String, ForeignKey("environment.environmentUri"), nullable=False) sagemakerStudioUri = Column( String, primary_key=True, default=utils.uuid('sagemakerstudio') ) diff --git a/backend/migrations/README.md b/backend/migrations/README.md index cdc7b9420..33a2baa4b 100644 --- a/backend/migrations/README.md +++ b/backend/migrations/README.md @@ -40,7 +40,7 @@ Upon start of GraphQL container, sqlalchemy ```declarative_base``` is used to cr As the database is created from scratch, it has no current information about migration state, so, first we need to run database upgrade. After that alembic will be able to generate the further migrations locally. -This command will apply all migrations, and syncronize the DB state with local alembic historyt of migrations. +This command will apply all migrations, and syncronize the DB state with local alembic history of migrations. ```bash make upgrade-db ``` diff --git a/backend/migrations/versions/442dd8f223a9__alembic_sync.py b/backend/migrations/versions/6c9a8afee4e4__alembic_sync.py similarity index 92% rename from backend/migrations/versions/442dd8f223a9__alembic_sync.py rename to backend/migrations/versions/6c9a8afee4e4__alembic_sync.py index a9d38bd1d..9bd4d8d28 100644 --- a/backend/migrations/versions/442dd8f223a9__alembic_sync.py +++ b/backend/migrations/versions/6c9a8afee4e4__alembic_sync.py @@ -1,8 +1,8 @@ """_describe_changes_shortly -Revision ID: 442dd8f223a9 +Revision ID: 6c9a8afee4e4 Revises: f6cd4ba7dd8d -Create Date: 2024-02-08 13:19:22.898697 +Create Date: 2024-02-13 11:09:39.387899 """ from alembic import op @@ -10,7 +10,7 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = '442dd8f223a9' +revision = '6c9a8afee4e4' down_revision = 'f6cd4ba7dd8d' branch_labels = None depends_on = None @@ -18,94 +18,33 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('dataset_table_profiling_job') - op.drop_table('group_member') - op.drop_table('redshiftcluster') + op.drop_table('redshiftcluster_datasettable') op.drop_table('dataset_quality_rule') op.drop_table('tag') - op.drop_table('redshiftcluster_datasettable') - op.drop_table('item_tags') op.drop_table('redshiftcluster_dataset') - op.alter_column('datapipelineenvironments', 'region', - existing_type=sa.VARCHAR(), - nullable=True) + op.drop_table('item_tags') + op.drop_table('dataset_table_profiling_job') + op.drop_table('redshiftcluster') + op.drop_table('group_member') op.drop_constraint('dataset_bucket_datasetUri_fkey', 'dataset_bucket', type_='foreignkey') op.create_foreign_key(None, 'dataset_bucket', 'dataset', ['datasetUri'], ['datasetUri'], ondelete='CASCADE') - op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=True) op.create_foreign_key(None, 'environment_parameters', 'environment', ['environmentUri'], ['environmentUri']) - op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.alter_column('sagemaker_studio_domain', 'environmentUri', - existing_type=sa.VARCHAR(), - nullable=False) op.drop_constraint(None, 'environment_parameters', type_='foreignkey') - op.alter_column('environment_parameters', 'paramValue', - existing_type=sa.VARCHAR(), - nullable=False) op.drop_constraint(None, 'dataset_bucket', type_='foreignkey') op.create_foreign_key('dataset_bucket_datasetUri_fkey', 'dataset_bucket', 'dataset', ['datasetUri'], ['datasetUri']) - op.alter_column('datapipelineenvironments', 'region', - existing_type=sa.VARCHAR(), - nullable=False) - op.create_table('redshiftcluster_dataset', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') - ) - op.create_table('item_tags', - sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') - ) - op.create_table('redshiftcluster_datasettable', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', - name='redshiftcluster_datasettable_pkey') - ) - op.create_table('tag', - sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('id', name='tag_pkey') - ) - op.create_table('dataset_quality_rule', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + op.create_table('group_member', + sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') + sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') ) op.create_table('redshiftcluster', sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), @@ -150,15 +89,6 @@ def downgrade(): name='fk_redshiftcluster_env_uri'), sa.PrimaryKeyConstraint('clusterUri', name='redshiftcluster_pkey') ) - op.create_table('group_member', - sa.Column('groupUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.Column('userRoleInGroup', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('groupUri', 'userName', name='group_member_pkey') - ) op.create_table('dataset_table_profiling_job', sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), @@ -178,4 +108,56 @@ def downgrade(): sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), sa.PrimaryKeyConstraint('jobUri', name='dataset_table_profiling_job_pkey') ) + op.create_table('item_tags', + sa.Column('tagid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('itemid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('tagid', 'itemid', name='item_tags_pkey') + ) + op.create_table('redshiftcluster_dataset', + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetCopyEnabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', name='redshiftcluster_dataset_pkey') + ) + op.create_table('tag', + sa.Column('id', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tag', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('id', name='tag_pkey') + ) + op.create_table('dataset_quality_rule', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('ruleUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('query', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('logs', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('ruleUri', name='dataset_quality_rule_pkey') + ) + op.create_table('redshiftcluster_datasettable', + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('enabled', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('schema', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('databaseName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('dataLocation', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri', + name='redshiftcluster_datasettable_pkey') + ) # ### end Alembic commands ### From a38816b0aa4cad3b1cd357ebe222be41809b6af4 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Tue, 13 Feb 2024 11:46:17 +0000 Subject: [PATCH 11/17] Resolve confusion about schema 'local' --- backend/migrations/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/migrations/README.md b/backend/migrations/README.md index 33a2baa4b..95d278f83 100644 --- a/backend/migrations/README.md +++ b/backend/migrations/README.md @@ -23,7 +23,7 @@ docker compose up ```bash export PYTHONPATH=backend ``` -3. The containers initiated in the first step will default to using the schema named `local`. Alembic relies on the environmental variable `envname` to determine the schema. Set it to `local` with the following command: +3. The containers initiated in the first step will default to using the schema named `dkrcompose`. In order to freely experiment with database let's create new schema 'local'. Alembic relies on the environmental variable `envname` to determine the schema. Set it to `local` with the following command: ```bash export envname=local ``` From 7c8bf4389f987cd6b70eec8af66507834ea29890 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Tue, 13 Feb 2024 14:45:04 +0000 Subject: [PATCH 12/17] fix make file --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1a0dad26b..f5743e00c 100644 --- a/Makefile +++ b/Makefile @@ -91,7 +91,7 @@ generate-migrations: upgrade-pip install-backend alembic -c backend/alembic.ini upgrade head alembic -c backend/alembic.ini revision -m "_describe_changes_shortly" --autogenerate - +clean: @rm -fr cdk_out/ @rm -fr dist/ @rm -fr htmlcov/ From d3c52c5f2fb8697a38152a2f30dfbdb22d49909a Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 22 Feb 2024 11:28:15 +0000 Subject: [PATCH 13/17] connection.py changes proposed by Noah --- backend/dataall/base/db/connection.py | 3 +-- backend/migrations/README.md | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/dataall/base/db/connection.py b/backend/dataall/base/db/connection.py index a0425f72f..7f0f527e5 100644 --- a/backend/dataall/base/db/connection.py +++ b/backend/dataall/base/db/connection.py @@ -122,9 +122,8 @@ def get_engine(envname=ENVNAME): 'schema': envname, } else: - hostname = 'db' if envname == 'dkrcompose' else 'localhost' db_params = { - 'host': hostname, + 'host': 'db' if envname == 'dkrcompose' and os.path.exists("/.dockerenv") else 'localhost', 'db': 'dataall', 'user': 'postgres', 'pwd': 'docker', diff --git a/backend/migrations/README.md b/backend/migrations/README.md index 95d278f83..39a02d09f 100644 --- a/backend/migrations/README.md +++ b/backend/migrations/README.md @@ -28,7 +28,10 @@ export PYTHONPATH=backend export envname=local ``` In a real-life RDS database, `envname` adopts the value of the environment (e.g., dev, test, etc.). - +If you want to apply the same migrations against your db schema that is used in local data.all deployments, you should use schema `dkrcompose`. +```bash +export envname=dkrcompose +``` ## Managing migrations From d5a50358794cc8ed65dd7ec5bd4dbea259dd5f48 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 22 Feb 2024 13:13:30 +0000 Subject: [PATCH 14/17] Comments about additional model imports into env.py --- backend/migrations/README.md | 7 +++++++ backend/migrations/env.py | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/backend/migrations/README.md b/backend/migrations/README.md index 39a02d09f..a7138d46b 100644 --- a/backend/migrations/README.md +++ b/backend/migrations/README.md @@ -63,6 +63,7 @@ alembic -c backend/alembic.ini upgrade head alembic -c backend/alembic.ini revision -m "_describe_changes_shortly" --autogenerate ``` Please, change the auto-generated filename postfix with the short description of the migration purpose. +Always check autogenerated migration file to ensure that necessary changed are reflected there. ## What to know about autogenerated migrations: @@ -111,4 +112,10 @@ The type comparison logic is fully extensible as well; see [Comparing Types](htt - Some free-standing constraint additions and removals may not be supported, including PRIMARY KEY, EXCLUDE, CHECK; these are not necessarily implemented within the autogenerate detection system and also may not be supported by the supporting SQLAlchemy dialect. - Sequence additions, removals - not yet implemented. + +## Why alembic didn't add my new models into migration +For not yet detected reason alembic is 'blind' towards some files with models definition. If your new model is not added to migration file, +try import its class explicitly into file 'backend/migrations/env.py' under the line '# import additional models here'. + + https://alembic.sqlalchemy.org/en/latest/ \ No newline at end of file diff --git a/backend/migrations/env.py b/backend/migrations/env.py index 98e13a507..5b8504478 100644 --- a/backend/migrations/env.py +++ b/backend/migrations/env.py @@ -5,8 +5,9 @@ # DO NOT DELETE +# these models are not used directly in env.py, but these imports are important for alembic # import additional models here -# they are not used directly in env.py, but these imports are important for alembic + from dataall.modules.catalog.db.glossary_models import GlossaryNode, TermLink from dataall.modules.dashboards.db.dashboard_models import DashboardShare, Dashboard From 2bae05b7c7fcfa3e748cbbf3634bb43017603ed3 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 22 Feb 2024 16:36:54 +0000 Subject: [PATCH 15/17] Update 6c9a8afee4e4__alembic_sync.py rename migration in the file --- backend/migrations/versions/6c9a8afee4e4__alembic_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/migrations/versions/6c9a8afee4e4__alembic_sync.py b/backend/migrations/versions/6c9a8afee4e4__alembic_sync.py index 9bd4d8d28..9d7c0714f 100644 --- a/backend/migrations/versions/6c9a8afee4e4__alembic_sync.py +++ b/backend/migrations/versions/6c9a8afee4e4__alembic_sync.py @@ -1,4 +1,4 @@ -"""_describe_changes_shortly +"""alembic_sync Revision ID: 6c9a8afee4e4 Revises: f6cd4ba7dd8d From ef3577736415516c68c8dc609798476dc8e6d8e1 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 22 Feb 2024 16:37:58 +0000 Subject: [PATCH 16/17] Update Makefile remove "_" from "describe_changes_shortly" --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f5743e00c..de3b742d8 100644 --- a/Makefile +++ b/Makefile @@ -89,7 +89,7 @@ generate-migrations: upgrade-pip install-backend pip install 'alembic' export PYTHONPATH=./backend && \ alembic -c backend/alembic.ini upgrade head - alembic -c backend/alembic.ini revision -m "_describe_changes_shortly" --autogenerate + alembic -c backend/alembic.ini revision -m "describe_changes_shortly" --autogenerate clean: @rm -fr cdk_out/ From 216b40d7a57f9ca23e2e59ab8bba939fc6512b3e Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 22 Feb 2024 16:39:14 +0000 Subject: [PATCH 17/17] Update README.md Lie about 'rename the migration' in README --- backend/migrations/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/migrations/README.md b/backend/migrations/README.md index a7138d46b..3a775658a 100644 --- a/backend/migrations/README.md +++ b/backend/migrations/README.md @@ -62,7 +62,7 @@ or alembic -c backend/alembic.ini upgrade head alembic -c backend/alembic.ini revision -m "_describe_changes_shortly" --autogenerate ``` -Please, change the auto-generated filename postfix with the short description of the migration purpose. +Please, change the auto-generated filename postfix with the short description of the migration purpose. Also, rename this prefix in the file itself (first line) Always check autogenerated migration file to ensure that necessary changed are reflected there. ## What to know about autogenerated migrations: @@ -118,4 +118,4 @@ For not yet detected reason alembic is 'blind' towards some files with models de try import its class explicitly into file 'backend/migrations/env.py' under the line '# import additional models here'. -https://alembic.sqlalchemy.org/en/latest/ \ No newline at end of file +https://alembic.sqlalchemy.org/en/latest/