From 96a5056ca084222667ce8b7d295771027a4bb3b9 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 14 May 2024 14:23:06 +0200 Subject: [PATCH 1/6] create db snapshot in migrations --- backend/dbmigrations_handler.py | 54 ++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/backend/dbmigrations_handler.py b/backend/dbmigrations_handler.py index 00d7c5c98..25206d3fd 100644 --- a/backend/dbmigrations_handler.py +++ b/backend/dbmigrations_handler.py @@ -4,15 +4,67 @@ import logging import os - +import datetime +import boto3 +import time from alembic import command +from alembic.script import ScriptDirectory +from alembic.migration import MigrationContext from alembic.config import Config +from dataall.base.db.connection import ENVNAME, get_engine logger = logging.getLogger() logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) def handler(event, context) -> None: + """ + This function will be called once upon every deployment. + It checks if there are any alembic migration scripts to execute. + If there are, it will create a snapshot of the database. + It executes the alembic migration scripts. + """ alembic_cfg = Config('alembic.ini') alembic_cfg.set_main_option('script_location', './migrations') + + # Get head version + script = ScriptDirectory.from_config(alembic_cfg) + head_rev = script.get_current_head() + + # Get current version from database + engine = get_engine(ENVNAME) + with engine.engine.connect() as connection: + context = MigrationContext.configure(connection) + current_rev = context.get_current_revision() + + if head_rev != current_rev: + snapshot_id = f'migration-{head_rev}-{datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}' + cluster_id = engine.dbconfig.host.split('.')[0] + logger.info( + f'Creating RDS snapshot for cluster {cluster_id}, head revision {head_rev} is ahead of {current_rev}...' + ) + try: + rds_client = boto3.client('rds', region_name=os.getenv('AWS_REGION')) + cluster_id = 'res-fr-db' + cluster_status = '' + while cluster_status != 'available': + # Edge case in which the cluster is performing backup and/or maintenance operations. + # If it times out the CICD pipeline fails and needs to be retried. + logger.info(f'Waiting while the cluster is available, status={cluster_status}') + response = rds_client.describe_db_clusters(DBClusterIdentifier=cluster_id) + cluster_status = response['DBClusters'][0]['Status'] + time.sleep(30) + + rds_client.create_db_cluster_snapshot( + DBClusterSnapshotIdentifier=snapshot_id, + DBClusterIdentifier=cluster_id, + Tags=[ + {'Key': 'Application', 'Value': 'dataall'}, + ], + ) + except Exception as e: + logger.exception(f'Failed to create RDS snapshot: {e}') + raise Exception(f'Failed to create RDS snapshot: {e}') + + # Execute the alembic migration scripts command.upgrade(alembic_cfg, 'head') # logging breaks after this command From 0211414a3c11cb777f07a00e30485091f2f13190 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 14 May 2024 15:23:26 +0200 Subject: [PATCH 2/6] Add IAM permissions to db-migrations trigger function --- backend/dbmigrations_handler.py | 2 +- deploy/stacks/backend_stack.py | 10 ++++++++++ deploy/stacks/trigger_function_stack.py | 4 +++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/backend/dbmigrations_handler.py b/backend/dbmigrations_handler.py index 25206d3fd..a809c8c83 100644 --- a/backend/dbmigrations_handler.py +++ b/backend/dbmigrations_handler.py @@ -38,7 +38,7 @@ def handler(event, context) -> None: current_rev = context.get_current_revision() if head_rev != current_rev: - snapshot_id = f'migration-{head_rev}-{datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}' + snapshot_id = f'dataall-migration-{head_rev}-{datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}' cluster_id = engine.dbconfig.host.split('.')[0] logger.info( f'Creating RDS snapshot for cluster {cluster_id}, head revision {head_rev} is ahead of {current_rev}...' diff --git a/deploy/stacks/backend_stack.py b/deploy/stacks/backend_stack.py index 42206bc48..5e9e77539 100644 --- a/deploy/stacks/backend_stack.py +++ b/deploy/stacks/backend_stack.py @@ -322,6 +322,16 @@ def __init__( ecr_repository=repo, execute_after=[aurora_stack.cluster], connectables=[aurora_stack.cluster], + additional_policy_statements=[ + iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=['rds:AddTagsToResource', 'rds:CreateDBClusterSnapshot', 'rds:DescribeDBClusters'], + resources=[ + f'arn:aws:rds:*:{self.account}:snapshot:dataall*', + f'arn:aws:rds:*:{self.account}:cluster:dataall*', + ], + ) + ], **kwargs, ) diff --git a/deploy/stacks/trigger_function_stack.py b/deploy/stacks/trigger_function_stack.py index 70924a474..69b4b5c15 100644 --- a/deploy/stacks/trigger_function_stack.py +++ b/deploy/stacks/trigger_function_stack.py @@ -27,6 +27,7 @@ def __init__( vpce_connection: ec2.IConnectable = None, connectables: List[ec2.IConnectable] = [], execute_after: List[Construct] = [], + additional_policy_statements: List[iam.PolicyStatement] = [], **kwargs, ): super().__init__(scope, id, **kwargs) @@ -38,13 +39,14 @@ def __init__( env = {'envname': envname, 'LOG_LEVEL': 'INFO'} function_sgs = self.create_lambda_sgs(envname, handler, resource_prefix, vpc) + policy_statements = self.get_policy_statements(resource_prefix).append(additional_policy_statements) self.trigger_function = TriggerFunction( self, f'TriggerFunction-{handler}', function_name=f'{resource_prefix}-{envname}-{handler.replace(".", "_")}', description=f'dataall {handler} trigger function', - initial_policy=self.get_policy_statements(resource_prefix), + initial_policy=policy_statements, code=_lambda.Code.from_ecr_image(repository=ecr_repository, tag=image_tag, cmd=[handler]), vpc=vpc, security_groups=[function_sgs], From 012451b44947452049f8b2865a4fdac41d48aca5 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 15 May 2024 11:14:53 +0200 Subject: [PATCH 3/6] Different way of concatenating policies --- deploy/stacks/trigger_function_stack.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/deploy/stacks/trigger_function_stack.py b/deploy/stacks/trigger_function_stack.py index 69b4b5c15..ced884052 100644 --- a/deploy/stacks/trigger_function_stack.py +++ b/deploy/stacks/trigger_function_stack.py @@ -39,14 +39,13 @@ def __init__( env = {'envname': envname, 'LOG_LEVEL': 'INFO'} function_sgs = self.create_lambda_sgs(envname, handler, resource_prefix, vpc) - policy_statements = self.get_policy_statements(resource_prefix).append(additional_policy_statements) - + statements = self.get_policy_statements(resource_prefix) + (additional_policy_statements or []) self.trigger_function = TriggerFunction( self, f'TriggerFunction-{handler}', function_name=f'{resource_prefix}-{envname}-{handler.replace(".", "_")}', description=f'dataall {handler} trigger function', - initial_policy=policy_statements, + initial_policy=statements, code=_lambda.Code.from_ecr_image(repository=ecr_repository, tag=image_tag, cmd=[handler]), vpc=vpc, security_groups=[function_sgs], From e72c76b0c7dd1b95d909e06974995054858defd3 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 15 May 2024 14:34:48 +0200 Subject: [PATCH 4/6] Issues in PR review --- backend/dbmigrations_handler.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/backend/dbmigrations_handler.py b/backend/dbmigrations_handler.py index a809c8c83..4b55c9c26 100644 --- a/backend/dbmigrations_handler.py +++ b/backend/dbmigrations_handler.py @@ -45,14 +45,14 @@ def handler(event, context) -> None: ) try: rds_client = boto3.client('rds', region_name=os.getenv('AWS_REGION')) - cluster_id = 'res-fr-db' - cluster_status = '' - while cluster_status != 'available': - # Edge case in which the cluster is performing backup and/or maintenance operations. - # If it times out the CICD pipeline fails and needs to be retried. - logger.info(f'Waiting while the cluster is available, status={cluster_status}') - response = rds_client.describe_db_clusters(DBClusterIdentifier=cluster_id) - cluster_status = response['DBClusters'][0]['Status'] + # Edge case in which the cluster is performing backup and/or maintenance operations. + # If it times out the CICD pipeline fails and needs to be retried. + while ( + cluster_status := rds_client.describe_db_clusters(DBClusterIdentifier=cluster_id)['DbClusters'][0][ + 'Status' + ] + ) != 'available': + logger.info(f'Waiting while the cluster is available, {cluster_status=}') time.sleep(30) rds_client.create_db_cluster_snapshot( From ec17651744ba3655d337bf53a3cc10132dfb87c1 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 15 May 2024 15:23:59 +0200 Subject: [PATCH 5/6] ReadPermissions to DescribeDBClusters --- deploy/stacks/backend_stack.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/deploy/stacks/backend_stack.py b/deploy/stacks/backend_stack.py index 5e9e77539..410043dc2 100644 --- a/deploy/stacks/backend_stack.py +++ b/deploy/stacks/backend_stack.py @@ -325,11 +325,16 @@ def __init__( additional_policy_statements=[ iam.PolicyStatement( effect=iam.Effect.ALLOW, - actions=['rds:AddTagsToResource', 'rds:CreateDBClusterSnapshot', 'rds:DescribeDBClusters'], + actions=['rds:AddTagsToResource', 'rds:CreateDBClusterSnapshot'], resources=[ f'arn:aws:rds:*:{self.account}:snapshot:dataall*', f'arn:aws:rds:*:{self.account}:cluster:dataall*', ], + ), + iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=['rds:DescribeDBClusters'], + resources=["*"], ) ], **kwargs, From 4d5d8913130993474a8ff2edf6a31453280f0b2f Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 15 May 2024 15:23:59 +0200 Subject: [PATCH 6/6] Fix permissions + Split trigger function and move them to a dedicated location in backend --- backend/deployment_triggers/__init__.py | 0 .../dbmigrations_handler.py | 18 ++++++++++ .../dbsnapshots_handler.py} | 7 ++-- .../saveperms_handler.py | 0 deploy/stacks/backend_stack.py | 36 ++++++++++++++----- deploy/stacks/trigger_function_stack.py | 2 +- 6 files changed, 49 insertions(+), 14 deletions(-) create mode 100644 backend/deployment_triggers/__init__.py create mode 100644 backend/deployment_triggers/dbmigrations_handler.py rename backend/{dbmigrations_handler.py => deployment_triggers/dbsnapshots_handler.py} (89%) rename backend/{ => deployment_triggers}/saveperms_handler.py (100%) diff --git a/backend/deployment_triggers/__init__.py b/backend/deployment_triggers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/deployment_triggers/dbmigrations_handler.py b/backend/deployment_triggers/dbmigrations_handler.py new file mode 100644 index 000000000..00d7c5c98 --- /dev/null +++ b/backend/deployment_triggers/dbmigrations_handler.py @@ -0,0 +1,18 @@ +""" +The handler of this module will be called once upon every deployment +""" + +import logging +import os + +from alembic import command +from alembic.config import Config + +logger = logging.getLogger() +logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO')) + + +def handler(event, context) -> None: + alembic_cfg = Config('alembic.ini') + alembic_cfg.set_main_option('script_location', './migrations') + command.upgrade(alembic_cfg, 'head') # logging breaks after this command diff --git a/backend/dbmigrations_handler.py b/backend/deployment_triggers/dbsnapshots_handler.py similarity index 89% rename from backend/dbmigrations_handler.py rename to backend/deployment_triggers/dbsnapshots_handler.py index 4b55c9c26..5fea7d809 100644 --- a/backend/dbmigrations_handler.py +++ b/backend/deployment_triggers/dbsnapshots_handler.py @@ -38,7 +38,7 @@ def handler(event, context) -> None: current_rev = context.get_current_revision() if head_rev != current_rev: - snapshot_id = f'dataall-migration-{head_rev}-{datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}' + snapshot_id = f'{os.environ.get("resource_prefix", "dataall")}-migration-{head_rev}-{datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}' cluster_id = engine.dbconfig.host.split('.')[0] logger.info( f'Creating RDS snapshot for cluster {cluster_id}, head revision {head_rev} is ahead of {current_rev}...' @@ -48,7 +48,7 @@ def handler(event, context) -> None: # Edge case in which the cluster is performing backup and/or maintenance operations. # If it times out the CICD pipeline fails and needs to be retried. while ( - cluster_status := rds_client.describe_db_clusters(DBClusterIdentifier=cluster_id)['DbClusters'][0][ + cluster_status := rds_client.describe_db_clusters(DBClusterIdentifier=cluster_id)['DBClusters'][0][ 'Status' ] ) != 'available': @@ -65,6 +65,3 @@ def handler(event, context) -> None: except Exception as e: logger.exception(f'Failed to create RDS snapshot: {e}') raise Exception(f'Failed to create RDS snapshot: {e}') - - # Execute the alembic migration scripts - command.upgrade(alembic_cfg, 'head') # logging breaks after this command diff --git a/backend/saveperms_handler.py b/backend/deployment_triggers/saveperms_handler.py similarity index 100% rename from backend/saveperms_handler.py rename to backend/deployment_triggers/saveperms_handler.py diff --git a/deploy/stacks/backend_stack.py b/deploy/stacks/backend_stack.py index 5e9e77539..7b0a96709 100644 --- a/deploy/stacks/backend_stack.py +++ b/deploy/stacks/backend_stack.py @@ -310,10 +310,10 @@ def __init__( **kwargs, ) - db_migrations = TriggerFunctionStack( + db_snapshots = TriggerFunctionStack( self, - 'DbMigrations', - handler='dbmigrations_handler.handler', + 'DbSnapshots', + handler='deployment_triggers.dbsnapshots_handler.handler', envname=envname, resource_prefix=resource_prefix, vpc=vpc, @@ -325,20 +325,40 @@ def __init__( additional_policy_statements=[ iam.PolicyStatement( effect=iam.Effect.ALLOW, - actions=['rds:AddTagsToResource', 'rds:CreateDBClusterSnapshot', 'rds:DescribeDBClusters'], + actions=['rds:AddTagsToResource', 'rds:CreateDBClusterSnapshot'], resources=[ - f'arn:aws:rds:*:{self.account}:snapshot:dataall*', - f'arn:aws:rds:*:{self.account}:cluster:dataall*', + f'arn:aws:rds:*:{self.account}:cluster-snapshot:{resource_prefix}*', + f'arn:aws:rds:*:{self.account}:cluster:{resource_prefix}*', ], - ) + ), + iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=['rds:DescribeDBClusters'], + resources=['*'], + ), ], **kwargs, ) + db_migrations = TriggerFunctionStack( + self, + 'DbMigrations', + handler='deployment_triggers.dbmigrations_handler.handler', + envname=envname, + resource_prefix=resource_prefix, + vpc=vpc, + vpce_connection=vpce_connection, + image_tag=image_tag, + ecr_repository=repo, + execute_after=[db_snapshots.trigger_function], + connectables=[aurora_stack.cluster], + **kwargs, + ) + TriggerFunctionStack( self, 'SavePerms', - handler='saveperms_handler.handler', + handler='deployment_triggers.saveperms_handler.handler', envname=envname, resource_prefix=resource_prefix, vpc=vpc, diff --git a/deploy/stacks/trigger_function_stack.py b/deploy/stacks/trigger_function_stack.py index ced884052..c7bbb6539 100644 --- a/deploy/stacks/trigger_function_stack.py +++ b/deploy/stacks/trigger_function_stack.py @@ -36,7 +36,7 @@ def __init__( image_tag = self.node.try_get_context('image_tag') image_tag = f'lambdas-{image_tag}' - env = {'envname': envname, 'LOG_LEVEL': 'INFO'} + env = {'envname': envname, 'resource_prefix': resource_prefix, 'LOG_LEVEL': 'INFO'} function_sgs = self.create_lambda_sgs(envname, handler, resource_prefix, vpc) statements = self.get_policy_statements(resource_prefix) + (additional_policy_statements or [])