diff --git a/.checkov.baseline b/.checkov.baseline
index 83829801d..e0bbfce12 100644
--- a/.checkov.baseline
+++ b/.checkov.baseline
@@ -33,16 +33,594 @@
]
},
{
- "file": "/deploy/cdk_exec_policy/cdkExecPolicy.yaml",
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-backend-stage/dataallmaincicdstackdataalldevbackendstagebackendstackAurora5C370E8D.nested.template.json",
"findings": [
{
- "resource": "AWS::IAM::ManagedPolicy.CDKCustomExecutionPolicy0",
+ "resource": "AWS::RDS::DBCluster.AuroraDatabasedev25ACBF71",
+ "check_ids": [
+ "CKV_AWS_162"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-backend-stage/dataallmaincicdstackdataalldevbackendstagebackendstackCognito10115F09.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::Lambda::Function.AWSCDKTriggerCustomResourceProviderCustomResourceProviderHandler97BECD91",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.CognitoParamsSyncHandlerdevCD2D1951",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.CognitoProviderdevframeworkonEvent2C5E785F",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.TriggerFunctionCognitoConfig4D80131A",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-backend-stage/dataallmaincicdstackdataalldevbackendstagebackendstackDbMigrations2A0FEBE6.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::Lambda::Function.AWSCDKTriggerCustomResourceProviderCustomResourceProviderHandler97BECD91",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.TriggerFunctiondeploymenttriggersdbmigrationshandlerhandlerC889DF98",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-backend-stage/dataallmaincicdstackdataalldevbackendstagebackendstackDbSnapshotsF1AA6385.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::Lambda::Function.AWSCDKTriggerCustomResourceProviderCustomResourceProviderHandler97BECD91",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.TriggerFunctiondeploymenttriggersdbsnapshotshandlerhandler31D769F8",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-backend-stage/dataallmaincicdstackdataalldevbackendstagebackendstackECSA27D2427.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::Logs::LogGroup.ECSLogGroupcatalogindexerdev2B9059AB",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.ECSLogGroupcdkproxydev11E8A2C0",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.ECSLogGroupshareexpirationtaskdev40CB15AF",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.ECSLogGroupsharemanagerdev49FFAB5E",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.ECSLogGroupsharereapplierdev58405762",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.ECSLogGroupshareverifierdev877E0507",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.ECSLogGroupstacksupdaterdev2CE91D51",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.ECSLogGroupsubscriptionsdev7B40E2D8",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.ECSLogGrouptablessyncerdevAB12C7DE",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-backend-stage/dataallmaincicdstackdataalldevbackendstagebackendstackLambdas4B1DE6AF.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::ApiGateway::Stage.dataalldevapiDeploymentStageprodE0313A5B",
+ "check_ids": [
+ "CKV_AWS_120"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.AWSWorkerAA1523CA",
+ "check_ids": [
+ "CKV_AWS_115"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.CustomAuthorizerFunctiondevB38B5CCB",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.ElasticSearchProxyHandlerDBDE7574",
+ "check_ids": [
+ "CKV_AWS_115"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.LambdaGraphQL1131F2C3",
+ "check_ids": [
+ "CKV_AWS_115"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.awsworkerloggroupF395EFD3",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.customauthorizerloggroup8F3B5B9D",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.dataalldevapigateway2625FE76",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.esproxyloggroup53203D0B",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.graphqlloggroupB3FAA5F5",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-backend-stage/dataallmaincicdstackdataalldevbackendstagebackendstackOpenSearchA18441EC.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::Lambda::Function.AWS679f53fac002430cb0da5b7982bd22872D164C4C",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.EsAppLogGroupD5A3BCEA",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.OpenSearchDomaindevSlowIndexLogs6F7350C9",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::Logs::LogGroup.OpenSearchDomaindevSlowSearchLogs1F374004",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ },
+ {
+ "resource": "AWS::OpenSearchService::Domain.OpenSearchDomaindev1A616D02",
+ "check_ids": [
+ "CKV_AWS_317"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-backend-stage/dataallmaincicdstackdataalldevbackendstagebackendstackParamStoreC907C0F0.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::Lambda::Function.AWS679f53fac002430cb0da5b7982bd22872D164C4C",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-backend-stage/dataallmaincicdstackdataalldevbackendstagebackendstackS3Resources1DD771FA.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::Lambda::Function.CustomCDKBucketDeployment8693BB64968944B69AAFB0CC9EB8756C81C01536",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117",
+ "CKV_AWS_173"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.CustomS3AutoDeleteObjectsCustomResourceProviderHandler9D90184F",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ },
+ {
+ "resource": "AWS::S3::Bucket.dataalldevaccesslogsAE3B6354",
+ "check_ids": [
+ "CKV_AWS_18"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-backend-stage/dataallmaincicdstackdataalldevbackendstagebackendstackSavePermsE1C53B74.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::Lambda::Function.AWSCDKTriggerCustomResourceProviderCustomResourceProviderHandler97BECD91",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.TriggerFunctiondeploymenttriggerssavepermshandlerhandler04AEF392",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-backend-stage/dataallmaincicdstackdataalldevbackendstagebackendstackVpc2C1E1115.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::Logs::LogGroup.dataalldevflowlogs44F8EC01",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-cloudfront-stage/dataallmaincicdstackdataalldevcloudfrontstagecloudfrontstackCloudFront118EAF32.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::CloudFront::Distribution.CloudFrontDistributionBA64CE3A",
+ "check_ids": [
+ "CKV_AWS_174"
+ ]
+ },
+ {
+ "resource": "AWS::CloudFront::Distribution.userguideDistribution9C9E7FE0",
+ "check_ids": [
+ "CKV_AWS_174"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.dataalldevhttpheadersredirection47B7A62B",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ },
+ {
+ "resource": "AWS::S3::Bucket.dataalldevfrontend64065639",
+ "check_ids": [
+ "CKV_AWS_18"
+ ]
+ },
+ {
+ "resource": "AWS::S3::Bucket.dataalldevlogging0F6723EE",
+ "check_ids": [
+ "CKV_AWS_18"
+ ]
+ },
+ {
+ "resource": "AWS::S3::Bucket.dataalldevuserguide5964DC13",
+ "check_ids": [
+ "CKV_AWS_18"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/assembly-dataall-main-cicd-stack-dataall-dev-cloudfront-stage/dataallmaincicdstackdataalldevcloudfrontstagecloudfrontstackFrontendCognitoConfig345B272A.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::Lambda::Function.AWSCDKTriggerCustomResourceProviderCustomResourceProviderHandler97BECD91",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.TriggerFunctionCognitoUrlsConfig9FD27FEB",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/asset.3045cb6b4340be1e173df6dcf6248d565aa849ceda3e2cf2c2f221ccee4bc1d6/pivotRole.yaml",
+ "findings": [
+ {
+ "resource": "AWS::IAM::ManagedPolicy.PivotRolePolicy0",
"check_ids": [
- "CKV_AWS_107",
"CKV_AWS_109",
- "CKV_AWS_110",
"CKV_AWS_111"
]
+ },
+ {
+ "resource": "AWS::IAM::ManagedPolicy.PivotRolePolicy1",
+ "check_ids": [
+ "CKV_AWS_109"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/dataall-main-cicd-stack.template.json",
+ "findings": [
+ {
+ "resource": "AWS::IAM::Policy.dataallmaincdkpipelineAssetsFileRoleDefaultPolicyFCD7832D",
+ "check_ids": [
+ "CKV_AWS_111"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.CustomS3AutoDeleteObjectsCustomResourceProviderHandler9D90184F",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117"
+ ]
+ },
+ {
+ "resource": "AWS::S3::Bucket.pipelineartifactsbucketE44F7DE9",
+ "check_ids": [
+ "CKV_AWS_18"
+ ]
+ },
+ {
+ "resource": "AWS::S3::Bucket.sourcecodebucket464EEFA3",
+ "check_ids": [
+ "CKV_AWS_18"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/dataallmaincicdstackAuroraB3A9A7CA.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::RDS::DBCluster.AuroraDatabasemainA7A8A785",
+ "check_ids": [
+ "CKV_AWS_162"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/cdk.out/dataallmaincicdstackVpcC62303B8.nested.template.json",
+ "findings": [
+ {
+ "resource": "AWS::Logs::LogGroup.dataallmainflowlogsEBED4B4A",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/checkov_environment_synth.json",
+ "findings": [
+ {
+ "resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicy19AC37181",
+ "check_ids": [
+ "CKV_AWS_111"
+ ]
+ },
+ {
+ "resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicy2E85AF510",
+ "check_ids": [
+ "CKV_AWS_111"
+ ]
+ },
+ {
+ "resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicy306EE0E93",
+ "check_ids": [
+ "CKV_AWS_111"
+ ]
+ },
+ {
+ "resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicy5A19E75CA",
+ "check_ids": [
+ "CKV_AWS_109"
+ ]
+ },
+ {
+ "resource": "AWS::IAM::ManagedPolicy.dataallanothergroup111111servicespolicyCC720210",
+ "check_ids": [
+ "CKV_AWS_109"
+ ]
+ },
+ {
+ "resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy1A0C96958",
+ "check_ids": [
+ "CKV_AWS_111"
+ ]
+ },
+ {
+ "resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy2B12D381A",
+ "check_ids": [
+ "CKV_AWS_111"
+ ]
+ },
+ {
+ "resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy391D03768",
+ "check_ids": [
+ "CKV_AWS_111"
+ ]
+ },
+ {
+ "resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy3E3CBA9E",
+ "check_ids": [
+ "CKV_AWS_109"
+ ]
+ },
+ {
+ "resource": "AWS::IAM::ManagedPolicy.dataalltestadmins111111servicespolicy56D7DC525",
+ "check_ids": [
+ "CKV_AWS_109"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.CustomCDKBucketDeployment8693BB64968944B69AAFB0CC9EB8756C81C01536",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117",
+ "CKV_AWS_173"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.GlueDatabaseLFCustomResourceHandler7FAF0F82",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_117",
+ "CKV_AWS_173"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.LakeformationDefaultSettingsHandler2CBEDB06",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_117",
+ "CKV_AWS_173"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.dataallGlueDbCustomResourceProviderframeworkonEventF8347BA7",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117",
+ "CKV_AWS_173"
+ ]
+ },
+ {
+ "resource": "AWS::Lambda::Function.dataallLakeformationDefaultSettingsProviderframeworkonEventBB660E32",
+ "check_ids": [
+ "CKV_AWS_115",
+ "CKV_AWS_116",
+ "CKV_AWS_117",
+ "CKV_AWS_173"
+ ]
+ },
+ {
+ "resource": "AWS::S3::Bucket.EnvironmentDefaultBucket78C3A8B0",
+ "check_ids": [
+ "CKV_AWS_18"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/checkov_notebook_synth.json",
+ "findings": [
+ {
+ "resource": "AWS::SageMaker::NotebookInstance.Notebook111111",
+ "check_ids": [
+ "CKV2_AWS_68",
+ "CKV_AWS_371"
+ ]
+ }
+ ]
+ },
+ {
+ "file": "/checkov_smstudio_extension_synth.json",
+ "findings": [
+ {
+ "resource": "AWS::Logs::LogGroup.SageMakerStudiodev97911306",
+ "check_ids": [
+ "CKV_AWS_158"
+ ]
}
]
},
diff --git a/UserGuide.pdf b/UserGuide.pdf
index 08687e42f..04c8e9a7a 100644
Binary files a/UserGuide.pdf and b/UserGuide.pdf differ
diff --git a/backend/api_handler.py b/backend/api_handler.py
index 74559b1ac..e46113546 100644
--- a/backend/api_handler.py
+++ b/backend/api_handler.py
@@ -15,6 +15,7 @@
attach_tenant_policy_for_groups,
check_reauth,
validate_and_block_if_maintenance_window,
+ redact_creds,
)
from dataall.core.tasks.service_handlers import Worker
from dataall.base.aws.sqs import SqsQueue
@@ -83,6 +84,7 @@ def handler(event, context):
Return doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html
"""
+ event = redact_creds(event)
log.info('Lambda Event %s', event)
log.debug('Env name %s', ENVNAME)
log.debug('Engine %s', ENGINE.engine.url)
@@ -140,8 +142,8 @@ def handler(event, context):
dispose_context()
response = json.dumps(response)
- log.info('Lambda Response %s', response)
-
+ log.info('Lambda Response Success: %s', success)
+ log.debug('Lambda Response %s', response)
return {
'statusCode': 200 if success else 400,
'headers': {
diff --git a/backend/aws_handler.py b/backend/aws_handler.py
index 8ad2b2157..2a2fa6d4b 100644
--- a/backend/aws_handler.py
+++ b/backend/aws_handler.py
@@ -7,7 +7,7 @@
from dataall.base.loader import load_modules, ImportMode
logger = logging.getLogger()
-logger.setLevel(os.environ.get('LOG_LEVEL'))
+logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
log = logging.getLogger(__name__)
ENVNAME = os.getenv('envname', 'local')
diff --git a/backend/dataall/base/aws/quicksight.py b/backend/dataall/base/aws/quicksight.py
index b9eda86d7..d25540a4c 100644
--- a/backend/dataall/base/aws/quicksight.py
+++ b/backend/dataall/base/aws/quicksight.py
@@ -3,8 +3,7 @@
from .sts import SessionHelper
-logger = logging.getLogger('QuicksightHandler')
-logger.setLevel(logging.DEBUG)
+logger = logging.getLogger(__name__)
class QuicksightClient:
diff --git a/backend/dataall/base/utils/api_handler_utils.py b/backend/dataall/base/utils/api_handler_utils.py
index fe445db3d..3bb3ec373 100644
--- a/backend/dataall/base/utils/api_handler_utils.py
+++ b/backend/dataall/base/utils/api_handler_utils.py
@@ -23,6 +23,16 @@
item.casefold() for item in ['getGroupsForUser', 'getMaintenanceWindowStatus']
]
ENGINE = get_engine(envname=ENVNAME)
+AWS_REGION = os.getenv('AWS_REGION')
+
+
+def redact_creds(event):
+ if event.get('headers', {}).get('Authorization'):
+ event['headers']['Authorization'] = 'XXXXXXXXXXXX'
+
+ if event.get('multiValueHeaders', {}).get('Authorization'):
+ event['multiValueHeaders']['Authorization'] = 'XXXXXXXXXXXX'
+ return event
def get_cognito_groups(claims):
@@ -106,7 +116,7 @@ def check_reauth(query, auth_time, username):
# Determine if there are any Operations that Require ReAuth From SSM Parameter
try:
reauth_apis = ParameterStoreManager.get_parameter_value(
- region=os.getenv('AWS_REGION', 'eu-west-1'), parameter_path=f'/dataall/{ENVNAME}/reauth/apis'
+ region=AWS_REGION, parameter_path=f'/dataall/{ENVNAME}/reauth/apis'
).split(',')
except Exception:
log.info('No ReAuth APIs Found in SSM')
diff --git a/backend/dataall/core/environment/cdk/environment_stack.py b/backend/dataall/core/environment/cdk/environment_stack.py
index 269ccae6a..ca4a27190 100644
--- a/backend/dataall/core/environment/cdk/environment_stack.py
+++ b/backend/dataall/core/environment/cdk/environment_stack.py
@@ -176,6 +176,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
versioned=True,
enforce_ssl=True,
)
+ default_environment_bucket.policy.apply_removal_policy(RemovalPolicy.RETAIN)
self.default_environment_bucket = default_environment_bucket
default_environment_bucket.add_to_resource_policy(
diff --git a/backend/dataall/core/environment/cdk/pivot_role_core_policies/iam.py b/backend/dataall/core/environment/cdk/pivot_role_core_policies/iam.py
index b84083bbf..bbd1bdddc 100644
--- a/backend/dataall/core/environment/cdk/pivot_role_core_policies/iam.py
+++ b/backend/dataall/core/environment/cdk/pivot_role_core_policies/iam.py
@@ -24,5 +24,12 @@ def get_statements(self):
f'arn:aws:iam::{self.account}:role/{self.role_name}',
],
),
+ # DENY to prevent pivot role to grant itself permissions
+ iam.PolicyStatement(
+ sid='IAMDenyForPivotRole',
+ effect=iam.Effect.DENY,
+ actions=['iam:Put*', 'iam:Delete*', 'iam:Update*', 'iam:AttachRolePolicy', 'iam:DetachRolePolicy'],
+ resources=[f'arn:aws:iam::{self.account}:role/{self.role_name}'],
+ ),
]
return statements
diff --git a/backend/dataall/core/environment/services/environment_service.py b/backend/dataall/core/environment/services/environment_service.py
index d8a1f5f94..795e3a63b 100644
--- a/backend/dataall/core/environment/services/environment_service.py
+++ b/backend/dataall/core/environment/services/environment_service.py
@@ -846,6 +846,7 @@ def list_all_active_environments(session) -> List[Environment]:
return environments
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
@ResourcePolicyService.has_resource_permission(environment_permissions.DELETE_ENVIRONMENT)
def delete_environment(uri):
with get_context().db_engine.scoped_session() as session:
@@ -927,6 +928,7 @@ def resolve_user_role(environment: Environment):
return EnvironmentPermission.NotInvited.value
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def enable_subscriptions(environmentUri: str = None, input: dict = None):
context = get_context()
with context.db_engine.scoped_session() as session:
@@ -962,6 +964,7 @@ def enable_subscriptions(environmentUri: str = None, input: dict = None):
return True
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def disable_subscriptions(environment_uri: str = None):
context = get_context()
with context.db_engine.scoped_session() as session:
@@ -1023,6 +1026,7 @@ def _get_environment_group_aws_session(session, username, groups, environment, g
return aws_session
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def get_environment_assume_role_url(
environmentUri: str = None,
groupUri: str = None,
@@ -1050,6 +1054,7 @@ def get_environment_assume_role_url(
return url
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def generate_environment_access_token(environmentUri: str = None, groupUri: str = None):
context = get_context()
with context.db_engine.scoped_session() as session:
diff --git a/backend/dataall/core/environment/tasks/env_stacks_updater.py b/backend/dataall/core/environment/tasks/env_stacks_updater.py
index 013702238..ecf6b72f9 100644
--- a/backend/dataall/core/environment/tasks/env_stacks_updater.py
+++ b/backend/dataall/core/environment/tasks/env_stacks_updater.py
@@ -13,10 +13,10 @@
from dataall.base.utils import Parameter
root = logging.getLogger()
-root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
+log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
RETRIES = 30
SLEEP_TIME = 30
diff --git a/backend/dataall/core/organizations/services/organization_service.py b/backend/dataall/core/organizations/services/organization_service.py
index 739717e81..696ae0881 100644
--- a/backend/dataall/core/organizations/services/organization_service.py
+++ b/backend/dataall/core/organizations/services/organization_service.py
@@ -70,6 +70,7 @@ def create_organization(data):
return org
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_ORGANIZATIONS)
@ResourcePolicyService.has_resource_permission(UPDATE_ORGANIZATION)
def update_organization(uri, data):
context = get_context()
diff --git a/backend/dataall/core/permissions/services/group_policy_service.py b/backend/dataall/core/permissions/services/group_policy_service.py
index f26d704d8..9b2bb697d 100644
--- a/backend/dataall/core/permissions/services/group_policy_service.py
+++ b/backend/dataall/core/permissions/services/group_policy_service.py
@@ -45,6 +45,7 @@ def check_group_environment_permission(uri, group, permission_name):
permission_name=permission_name,
)
+ @staticmethod
def has_group_permission(permission):
def decorator(f):
@wraps(f)
diff --git a/backend/dataall/core/permissions/services/tenant_policy_service.py b/backend/dataall/core/permissions/services/tenant_policy_service.py
index 71823b3fa..2d1d1511b 100644
--- a/backend/dataall/core/permissions/services/tenant_policy_service.py
+++ b/backend/dataall/core/permissions/services/tenant_policy_service.py
@@ -337,6 +337,7 @@ def save_permissions_with_tenant(engine, envname=None):
TenantPolicyService.save_tenant(session, name=TenantPolicyService.TENANT_NAME, description='Tenant dataall')
PermissionService.init_permissions(session)
+ @staticmethod
def has_tenant_permission(permission: str):
"""
Decorator to check if a user has a permission to do some action.
diff --git a/backend/dataall/core/stacks/tasks/cdkproxy.py b/backend/dataall/core/stacks/tasks/cdkproxy.py
index 62262dbc9..198f80081 100644
--- a/backend/dataall/core/stacks/tasks/cdkproxy.py
+++ b/backend/dataall/core/stacks/tasks/cdkproxy.py
@@ -6,10 +6,10 @@
from dataall.base.db import get_engine
root = logging.getLogger()
-root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
logger = logging.getLogger(__name__)
+logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
if __name__ == '__main__':
diff --git a/backend/dataall/modules/catalog/indexers/base_indexer.py b/backend/dataall/modules/catalog/indexers/base_indexer.py
index 98ce693e7..f0b8839d2 100644
--- a/backend/dataall/modules/catalog/indexers/base_indexer.py
+++ b/backend/dataall/modules/catalog/indexers/base_indexer.py
@@ -17,6 +17,7 @@ class BaseIndexer(ABC):
_INDEX = 'dataall-index'
_es = None
+ _QUERY_SIZE = 1000
@classmethod
def es(cls):
@@ -53,11 +54,31 @@ def _index(cls, doc_id, doc):
return False
@classmethod
- def search(cls, query):
+ def search_all(cls, query, sort):
+ all_results = []
+ search_after = None
+ while True:
+ if search_after:
+ query['search_after'] = search_after
+
+ response = BaseIndexer.search(query=query, sort=sort)
+ hits = response['hits']['hits']
+ if not hits:
+ break # No more results
+
+ all_results.extend(hits)
+
+ # Update search_after for the next iteration
+ search_after = hits[-1]['sort']
+
+ return all_results
+
+ @classmethod
+ def search(cls, query, sort=None):
es = cls.es()
if es:
- res = es.search(index=cls._INDEX, body=query)
- log.info(f'Search query {query} returned {res["hits"]["total"]["value"]} records')
+ res = es.search(index=cls._INDEX, body=query, sort=sort, size=cls._QUERY_SIZE)
+ log.info(f'Search query {query} found {res["hits"]["total"]["value"]} total records')
return res
else:
log.error(f'ES config is missing, search query {query} failed')
diff --git a/backend/dataall/modules/catalog/services/glossaries_service.py b/backend/dataall/modules/catalog/services/glossaries_service.py
index 7d522c449..92ba22142 100644
--- a/backend/dataall/modules/catalog/services/glossaries_service.py
+++ b/backend/dataall/modules/catalog/services/glossaries_service.py
@@ -106,6 +106,7 @@ def delete_node(uri: str = None):
return GlossaryRepository.delete_node(session=session, uri=uri)
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES)
def approve_term_association(linkUri: str):
with _session() as session:
return GlossaryRepository.approve_term_association(
@@ -113,6 +114,7 @@ def approve_term_association(linkUri: str):
)
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES)
def dismiss_term_association(linkUri: str):
with _session() as session:
return GlossaryRepository.dismiss_term_association(
diff --git a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py
index 7fd628465..032739db9 100644
--- a/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py
+++ b/backend/dataall/modules/catalog/tasks/catalog_indexer_task.py
@@ -10,10 +10,10 @@
from dataall.base.utils.alarm_service import AlarmService
root = logging.getLogger()
-root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
+log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
class CatalogIndexerTask:
@@ -43,12 +43,10 @@ def _delete_old_objects(cls, indexed_object_uris: List[str]) -> None:
# Search for documents in opensearch without an ID in the indexed_object_uris list
query = {'query': {'bool': {'must_not': {'terms': {'_id': indexed_object_uris}}}}}
# Delete All "Outdated" Objects from Index
- docs = BaseIndexer.search(query)
- for doc in docs.get('hits', {}).get('hits', []):
- log.info(f'Deleting document {doc["_id"]}...')
+ docs = BaseIndexer.search_all(query, sort='_id')
+ for doc in docs:
BaseIndexer.delete_doc(doc_id=doc['_id'])
-
- log.info(f'Deleted {len(docs.get("hits", {}).get("hits", []))} records')
+ log.info(f'Deleted {len(docs)} records')
if __name__ == '__main__':
diff --git a/backend/dataall/modules/dashboards/api/types.py b/backend/dataall/modules/dashboards/api/types.py
index 1150043cb..857cf9333 100644
--- a/backend/dataall/modules/dashboards/api/types.py
+++ b/backend/dataall/modules/dashboards/api/types.py
@@ -19,17 +19,13 @@
gql.Field('DashboardId', type=gql.String),
gql.Field('tags', type=gql.ArrayType(gql.String)),
gql.Field('created', type=gql.String),
+ gql.Field('AwsAccountId', type=gql.String),
gql.Field('updated', type=gql.String),
gql.Field('owner', type=gql.String),
gql.Field('SamlGroupName', type=gql.String),
- gql.Field(
- 'organization',
- type=gql.Ref('Organization'),
- resolver=get_dashboard_organization,
- ),
gql.Field(
'environment',
- type=gql.Ref('Environment'),
+ type=gql.Ref('EnvironmentSimplified'),
resolver=resolve_environment,
),
gql.Field(
diff --git a/backend/dataall/modules/dashboards/aws/dashboard_quicksight_client.py b/backend/dataall/modules/dashboards/aws/dashboard_quicksight_client.py
index bf7380e30..77d89866f 100644
--- a/backend/dataall/modules/dashboards/aws/dashboard_quicksight_client.py
+++ b/backend/dataall/modules/dashboards/aws/dashboard_quicksight_client.py
@@ -9,7 +9,6 @@
from dataall.base.aws.secrets_manager import SecretsManager
log = logging.getLogger(__name__)
-log.setLevel(logging.DEBUG)
class DashboardQuicksightClient:
diff --git a/backend/dataall/modules/dashboards/services/dashboard_service.py b/backend/dataall/modules/dashboards/services/dashboard_service.py
index 010d6469a..34d6c3a34 100644
--- a/backend/dataall/modules/dashboards/services/dashboard_service.py
+++ b/backend/dataall/modules/dashboards/services/dashboard_service.py
@@ -25,7 +25,6 @@ class DashboardService:
"""Service that serves request related to dashboard"""
@staticmethod
- @TenantPolicyService.has_tenant_permission(MANAGE_DASHBOARDS)
@ResourcePolicyService.has_resource_permission(GET_DASHBOARD)
def get_dashboard(uri: str) -> Dashboard:
with get_context().db_engine.scoped_session() as session:
diff --git a/backend/dataall/modules/datapipelines/services/datapipelines_service.py b/backend/dataall/modules/datapipelines/services/datapipelines_service.py
index 8da72dd16..de277d20d 100644
--- a/backend/dataall/modules/datapipelines/services/datapipelines_service.py
+++ b/backend/dataall/modules/datapipelines/services/datapipelines_service.py
@@ -180,7 +180,6 @@ def list_pipelines(*, filter: dict) -> dict:
)
@staticmethod
- @TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
@ResourcePolicyService.has_resource_permission(GET_PIPELINE)
def get_pipeline(
uri: str,
@@ -202,6 +201,7 @@ def get_clone_url_http(uri: str):
return f'codecommit::{env.region}://{pipeline.repo}'
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
@ResourcePolicyService.has_resource_permission(DELETE_PIPELINE)
def delete_pipeline(uri: str, deleteFromAWS: bool):
with _session() as session:
@@ -254,12 +254,14 @@ def _delete_repository(target_uri, accountid, cdk_role_arn, region, repo_name):
return True
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
def delete_pipeline_environment(envPipelineUri: str):
with _session() as session:
DatapipelinesRepository.delete_pipeline_environment(session=session, envPipelineUri=envPipelineUri)
return True
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
@ResourcePolicyService.has_resource_permission(CREDENTIALS_PIPELINE)
def get_credentials(uri):
with _session() as session:
diff --git a/backend/dataall/modules/datasets_base/api/types.py b/backend/dataall/modules/datasets_base/api/types.py
index 553cd61ec..36f2c8899 100644
--- a/backend/dataall/modules/datasets_base/api/types.py
+++ b/backend/dataall/modules/datasets_base/api/types.py
@@ -31,14 +31,9 @@
gql.Field(name='imported', type=gql.Boolean),
gql.Field(
name='environment',
- type=gql.Ref('Environment'),
+ type=gql.Ref('EnvironmentSimplified'),
resolver=get_dataset_environment,
),
- gql.Field(
- name='organization',
- type=gql.Ref('Organization'),
- resolver=get_dataset_organization,
- ),
gql.Field(
name='owners',
type=gql.String,
diff --git a/backend/dataall/modules/mlstudio/services/mlstudio_service.py b/backend/dataall/modules/mlstudio/services/mlstudio_service.py
index c47df205b..05593bebd 100644
--- a/backend/dataall/modules/mlstudio/services/mlstudio_service.py
+++ b/backend/dataall/modules/mlstudio/services/mlstudio_service.py
@@ -177,6 +177,7 @@ def create_sagemaker_studio_user(*, uri: str, admin_group: str, request: Sagemak
return sagemaker_studio_user
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
def update_sagemaker_studio_domain(environment, domain, data):
SagemakerStudioService._update_sagemaker_studio_domain_vpc(environment.AwsAccountId, environment.region, data)
domain.vpcType = data.get('vpcType')
@@ -205,6 +206,7 @@ def _update_sagemaker_studio_domain_vpc(account_id, region, data={}):
data['vpcType'] = 'created'
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
def create_sagemaker_studio_domain(session, environment, data: dict = {}):
SagemakerStudioService._update_sagemaker_studio_domain_vpc(environment.AwsAccountId, environment.region, data)
@@ -246,6 +248,7 @@ def get_sagemaker_studio_user_status(*, uri: str):
return status
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
@ResourcePolicyService.has_resource_permission(SGMSTUDIO_USER_URL)
def get_sagemaker_studio_user_presigned_url(*, uri: str):
with _session() as session:
@@ -259,6 +262,7 @@ def get_sagemaker_studio_user_applications(*, uri: str):
return sagemaker_studio_client(user).get_sagemaker_studio_user_applications()
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
@ResourcePolicyService.has_resource_permission(DELETE_SGMSTUDIO_USER)
def delete_sagemaker_studio_user(*, uri: str, delete_from_aws: bool):
"""Deletes SageMaker Studio user from the database and if delete_from_aws is True from AWS as well"""
diff --git a/backend/dataall/modules/notebooks/services/notebook_service.py b/backend/dataall/modules/notebooks/services/notebook_service.py
index 3a00e0984..26230938d 100644
--- a/backend/dataall/modules/notebooks/services/notebook_service.py
+++ b/backend/dataall/modules/notebooks/services/notebook_service.py
@@ -165,6 +165,7 @@ def get_notebook(*, uri) -> SagemakerNotebook:
return NotebookService._get_notebook(session, uri)
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_NOTEBOOKS)
@ResourcePolicyService.has_resource_permission(UPDATE_NOTEBOOK)
def start_notebook(*, uri):
"""Starts notebooks instance"""
@@ -172,6 +173,7 @@ def start_notebook(*, uri):
client(notebook).start_instance()
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_NOTEBOOKS)
@ResourcePolicyService.has_resource_permission(UPDATE_NOTEBOOK)
def stop_notebook(*, uri: str) -> None:
"""Stop notebook instance"""
@@ -179,6 +181,7 @@ def stop_notebook(*, uri: str) -> None:
client(notebook).stop_instance()
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_NOTEBOOKS)
@ResourcePolicyService.has_resource_permission(GET_NOTEBOOK)
def get_notebook_presigned_url(*, uri: str) -> str:
"""Creates and returns a presigned url for a notebook"""
@@ -193,6 +196,7 @@ def get_notebook_status(*, uri) -> str:
return client(notebook).get_notebook_instance_status()
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_NOTEBOOKS)
@ResourcePolicyService.has_resource_permission(DELETE_NOTEBOOK)
def delete_notebook(*, uri: str, delete_from_aws: bool):
"""Deletes notebook from the database and if delete_from_aws is True from AWS as well"""
diff --git a/backend/dataall/modules/notifications/db/notification_repositories.py b/backend/dataall/modules/notifications/db/notification_repositories.py
index 34ff7d1c6..4dd3b159a 100644
--- a/backend/dataall/modules/notifications/db/notification_repositories.py
+++ b/backend/dataall/modules/notifications/db/notification_repositories.py
@@ -3,7 +3,31 @@
from sqlalchemy import func, and_, or_
from dataall.modules.notifications.db import notification_models as models
-from dataall.base.db import paginate
+from dataall.base.db import paginate, exceptions
+from dataall.base.context import get_context
+from functools import wraps
+
+
+class NotificationAccess:
+ @staticmethod
+ def is_recipient(f):
+ @wraps(f)
+ def wrapper(*args, **kwds):
+ uri = kwds.get('notificationUri')
+ if not uri:
+ raise KeyError(f"{f.__name__} doesn't have parameter uri.")
+ context = get_context()
+ with context.db_engine.scoped_session() as session:
+ notification = session.query(models.Notification).get(uri)
+ if notification and (notification.recipient in context.groups + [context.username]):
+ return f(*args, **kwds)
+ else:
+ raise exceptions.UnauthorizedOperation(
+ action='UPDATE NOTIFICATION',
+ message=f'User {context.username} is not the recipient user/group of the notification {uri}',
+ )
+
+ return wrapper
class NotificationRepository:
@@ -88,6 +112,7 @@ def count_deleted_notifications(session, username, groups):
return int(count)
@staticmethod
+ @NotificationAccess.is_recipient
def read_notification(session, notificationUri):
notification = session.query(models.Notification).get(notificationUri)
notification.is_read = True
@@ -95,6 +120,7 @@ def read_notification(session, notificationUri):
return True
@staticmethod
+ @NotificationAccess.is_recipient
def delete_notification(session, notificationUri):
notification = session.query(models.Notification).get(notificationUri)
if notification:
diff --git a/backend/dataall/modules/notifications/handlers/notifications_handler.py b/backend/dataall/modules/notifications/handlers/notifications_handler.py
index ddb3edc37..68c85dbb2 100644
--- a/backend/dataall/modules/notifications/handlers/notifications_handler.py
+++ b/backend/dataall/modules/notifications/handlers/notifications_handler.py
@@ -5,7 +5,6 @@
from dataall.modules.notifications.services.ses_email_notification_service import SESEmailNotificationService
log = logging.getLogger(__name__)
-log.setLevel(logging.INFO)
class NotificationHandler:
diff --git a/backend/dataall/modules/omics/services/omics_service.py b/backend/dataall/modules/omics/services/omics_service.py
index 7f441c126..5523c460b 100644
--- a/backend/dataall/modules/omics/services/omics_service.py
+++ b/backend/dataall/modules/omics/services/omics_service.py
@@ -102,7 +102,6 @@ def get_omics_run_details_from_aws(uri: str):
return OmicsClient(awsAccountId=environment.AwsAccountId, region=environment.region).get_omics_run(uri)
@staticmethod
- @TenantPolicyService.has_tenant_permission(MANAGE_OMICS_RUNS)
def get_omics_workflow(uri: str) -> dict:
"""Get Omics workflow."""
with _session() as session:
@@ -117,7 +116,6 @@ def get_omics_workflow(uri: str) -> dict:
return response
@staticmethod
- @TenantPolicyService.has_tenant_permission(MANAGE_OMICS_RUNS)
def list_user_omics_runs(filter: dict) -> dict:
"""List existed user Omics runs. Filters only required omics_runs by the filter param"""
with _session() as session:
@@ -126,13 +124,13 @@ def list_user_omics_runs(filter: dict) -> dict:
)
@staticmethod
- @TenantPolicyService.has_tenant_permission(MANAGE_OMICS_RUNS)
def list_omics_workflows(filter: dict) -> dict:
"""List Omics workflows."""
with _session() as session:
return OmicsRepository(session).paginated_omics_workflows(filter=filter)
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_OMICS_RUNS)
def delete_omics_runs(uris: List[str], delete_from_aws: bool) -> bool:
"""Deletes Omics runs from the database and if delete_from_aws is True from AWS as well"""
for uri in uris:
diff --git a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py
index 0db79e2c9..ecfff37db 100644
--- a/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py
+++ b/backend/dataall/modules/omics/tasks/omics_workflows_fetcher.py
@@ -12,10 +12,10 @@
root = logging.getLogger()
-root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
+log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
def fetch_omics_workflows(engine):
diff --git a/backend/dataall/modules/s3_datasets/api/dataset/types.py b/backend/dataall/modules/s3_datasets/api/dataset/types.py
index 842d66ffd..282a29833 100644
--- a/backend/dataall/modules/s3_datasets/api/dataset/types.py
+++ b/backend/dataall/modules/s3_datasets/api/dataset/types.py
@@ -60,14 +60,9 @@
gql.Field(name='imported', type=gql.Boolean),
gql.Field(
name='environment',
- type=gql.Ref('Environment'),
+ type=gql.Ref('EnvironmentSimplified'),
resolver=get_dataset_environment,
),
- gql.Field(
- name='organization',
- type=gql.Ref('Organization'),
- resolver=get_dataset_organization,
- ),
gql.Field(
name='owners',
type=gql.String,
diff --git a/backend/dataall/modules/s3_datasets/cdk/assets/glueprofilingjob/glue_script.py b/backend/dataall/modules/s3_datasets/cdk/assets/glueprofilingjob/glue_script.py
index 0513e364d..2ffd07b57 100644
--- a/backend/dataall/modules/s3_datasets/cdk/assets/glueprofilingjob/glue_script.py
+++ b/backend/dataall/modules/s3_datasets/cdk/assets/glueprofilingjob/glue_script.py
@@ -2,7 +2,7 @@
# workaround: SPARK_VERSION must be already set before import of pydeequ packages
# ruff: noqa: E402
-os.environ['SPARK_VERSION'] = '3.1'
+os.environ['SPARK_VERSION'] = '3.3'
import json
import logging
diff --git a/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py b/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py
index fd6d0cf0b..bb5b51c6d 100644
--- a/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py
+++ b/backend/dataall/modules/s3_datasets/cdk/dataset_stack.py
@@ -120,6 +120,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
'DatasetKmsKey',
alias=dataset.KmsAlias,
enable_key_rotation=True,
+ removal_policy=RemovalPolicy.RETAIN,
policy=iam.PolicyDocument(
statements=[
iam.PolicyStatement(
@@ -172,6 +173,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
bucket_name=dataset.S3BucketName,
encryption=s3.BucketEncryption.KMS,
encryption_key=dataset_key,
+ removal_policy=RemovalPolicy.RETAIN,
cors=[
s3.CorsRule(
allowed_methods=[
@@ -197,6 +199,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
versioned=True,
bucket_key_enabled=True,
)
+ dataset_bucket.policy.apply_removal_policy(RemovalPolicy.RETAIN)
dataset_bucket.add_lifecycle_rule(
abort_incomplete_multipart_upload_after=Duration.days(7),
@@ -464,12 +467,12 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
'--apiUrl': 'None',
'--snsTopicArn': 'None',
'--extra-jars': (
- f's3://{env.EnvironmentDefaultBucketName}' f'/profiling/code/jars/deequ-2.0.0-spark-3.1.jar'
+ f's3://{env.EnvironmentDefaultBucketName}' f'/profiling/code/jars/deequ-2.0.7-spark-3.3.jar'
),
'--enable-metrics': 'true',
'--enable-continuous-cloudwatch-log': 'true',
'--enable-glue-datacatalog': 'true',
- '--SPARK_VERSION': '3.1',
+ '--SPARK_VERSION': '3.3',
}
job = glue.CfnJob(
@@ -486,7 +489,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
script_location=(f's3://{env.EnvironmentDefaultBucketName}' f'/profiling/code/glue_script.py'),
),
default_arguments=job_args,
- glue_version='3.0',
+ glue_version='4.0',
tags={'Application': 'dataall'},
)
if dataset.GlueProfilingTriggerSchedule:
diff --git a/backend/dataall/modules/s3_datasets/services/dataset_location_service.py b/backend/dataall/modules/s3_datasets/services/dataset_location_service.py
index a4ac2b33f..ee83d1c5f 100644
--- a/backend/dataall/modules/s3_datasets/services/dataset_location_service.py
+++ b/backend/dataall/modules/s3_datasets/services/dataset_location_service.py
@@ -53,14 +53,12 @@ def create_storage_location(uri: str, data: dict):
return location
@staticmethod
- @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
@ResourcePolicyService.has_resource_permission(LIST_DATASET_FOLDERS)
def list_dataset_locations(uri: str, filter: dict = None):
with get_context().db_engine.scoped_session() as session:
return DatasetLocationRepository.list_dataset_locations(session=session, uri=uri, data=filter)
@staticmethod
- @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
@ResourcePolicyService.has_resource_permission(GET_DATASET_FOLDER)
def get_storage_location(uri):
with get_context().db_engine.scoped_session() as session:
diff --git a/backend/dataall/modules/s3_datasets/services/dataset_service.py b/backend/dataall/modules/s3_datasets/services/dataset_service.py
index a8bdbc700..908e38551 100644
--- a/backend/dataall/modules/s3_datasets/services/dataset_service.py
+++ b/backend/dataall/modules/s3_datasets/services/dataset_service.py
@@ -203,12 +203,12 @@ def create_dataset(uri, admin_group, data: dict):
return dataset
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
def import_dataset(uri, admin_group, data):
data['imported'] = True
return DatasetService.create_dataset(uri=uri, admin_group=admin_group, data=data)
@staticmethod
- @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
def get_dataset(uri):
context = get_context()
with context.db_engine.scoped_session() as session:
@@ -218,6 +218,7 @@ def get_dataset(uri):
return dataset
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
@ResourcePolicyService.has_resource_permission(CREDENTIALS_DATASET)
def get_file_upload_presigned_url(uri: str, data: dict):
with get_context().db_engine.scoped_session() as session:
@@ -304,6 +305,7 @@ def get_dataset_statistics(dataset: S3Dataset):
}
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
@ResourcePolicyService.has_resource_permission(CREDENTIALS_DATASET)
def get_dataset_assume_role_url(uri):
context = get_context()
@@ -329,6 +331,7 @@ def get_dataset_assume_role_url(uri):
return url
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
@ResourcePolicyService.has_resource_permission(CRAWL_DATASET)
def start_crawler(uri: str, data: dict = None):
engine = get_context().db_engine
@@ -360,6 +363,7 @@ def start_crawler(uri: str, data: dict = None):
}
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
@ResourcePolicyService.has_resource_permission(CREDENTIALS_DATASET)
def generate_dataset_access_token(uri):
with get_context().db_engine.scoped_session() as session:
@@ -377,6 +381,7 @@ def generate_dataset_access_token(uri):
return json.dumps(credentials)
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
@ResourcePolicyService.has_resource_permission(DELETE_DATASET)
def delete_dataset(uri: str, delete_from_aws: bool = False):
context = get_context()
@@ -396,7 +401,7 @@ def delete_dataset(uri: str, delete_from_aws: bool = False):
DatasetIndexer.delete_doc(doc_id=uri)
DatasetService.execute_on_delete(session, uri, action=DELETE_DATASET)
- DatasetService.delete_dataset_term_links(session, uri)
+ DatasetService._delete_dataset_term_links(session, uri)
DatasetTableRepository.delete_dataset_tables(session, dataset.datasetUri)
DatasetLocationRepository.delete_dataset_locations(session, dataset.datasetUri)
DatasetBucketRepository.delete_dataset_buckets(session, dataset.datasetUri)
@@ -518,7 +523,7 @@ def _transfer_stewardship_to_new_stewards(session, dataset, new_stewards):
return dataset
@staticmethod
- def delete_dataset_term_links(session, dataset_uri):
+ def _delete_dataset_term_links(session, dataset_uri):
tables = [t.tableUri for t in DatasetRepository.get_dataset_tables(session, dataset_uri)]
for table_uri in tables:
GlossaryRepository.delete_glossary_terms_links(session, table_uri, 'DatasetTable')
diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py
index 804156912..021bfb37b 100644
--- a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py
+++ b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py
@@ -37,7 +37,6 @@ def _get_dataset_uri(session, table_uri):
return table.datasetUri
@staticmethod
- @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
def get_table(uri: str):
with get_context().db_engine.scoped_session() as session:
return DatasetTableRepository.get_dataset_table_by_uri(session, uri)
@@ -107,6 +106,7 @@ def get_glue_table_properties(uri: str):
return json_utils.to_string(table.GlueTableProperties).replace('\\', ' ')
@classmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
@ResourcePolicyService.has_resource_permission(SYNC_DATASET)
def sync_tables_for_dataset(cls, uri):
context = get_context()
diff --git a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py
index e530a9e6a..bfd4feaad 100644
--- a/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py
+++ b/backend/dataall/modules/s3_datasets/tasks/tables_syncer.py
@@ -17,10 +17,10 @@
from dataall.modules.s3_datasets.services.dataset_alarm_service import DatasetAlarmService
root = logging.getLogger()
-root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
+log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
def sync_tables(engine):
diff --git a/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py b/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py
index e5a29c904..cf6a9ec05 100644
--- a/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py
+++ b/backend/dataall/modules/s3_datasets_shares/tasks/dataset_subscription_task.py
@@ -23,10 +23,10 @@
from dataall.modules.shares_base.services.share_notification_service import DataSharingNotificationType
root = logging.getLogger()
-root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
+log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
# TODO: review this task usage and remove if not needed
diff --git a/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py b/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py
index 544345fe6..a122b8915 100644
--- a/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py
+++ b/backend/dataall/modules/s3_datasets_shares/tasks/subscriptions/sqs_poller.py
@@ -7,10 +7,10 @@
from botocore.exceptions import ClientError
root = logging.getLogger()
-root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
+log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
ENVNAME = os.getenv('envname', 'local')
region = os.getenv('AWS_REGION', 'eu-west-1')
diff --git a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py
index 67a77145f..e9982c6c7 100644
--- a/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py
+++ b/backend/dataall/modules/shares_base/tasks/persistent_email_reminders_task.py
@@ -11,10 +11,10 @@
root = logging.getLogger()
-root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
+log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
def persistent_email_reminders(engine):
diff --git a/backend/dataall/modules/shares_base/tasks/share_expiration_task.py b/backend/dataall/modules/shares_base/tasks/share_expiration_task.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/backend/dataall/modules/shares_base/tasks/share_manager_task.py b/backend/dataall/modules/shares_base/tasks/share_manager_task.py
index 4048f0c5c..65da67ca4 100644
--- a/backend/dataall/modules/shares_base/tasks/share_manager_task.py
+++ b/backend/dataall/modules/shares_base/tasks/share_manager_task.py
@@ -7,10 +7,10 @@
from dataall.base.loader import load_modules, ImportMode
root = logging.getLogger()
-root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
+log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
if __name__ == '__main__':
diff --git a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py
index 053d53dd2..32eebd5ab 100644
--- a/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py
+++ b/backend/dataall/modules/shares_base/tasks/share_reapplier_task.py
@@ -12,10 +12,10 @@
from dataall.base.loader import load_modules, ImportMode
root = logging.getLogger()
-root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
+log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
class EcsBulkShareRepplyService:
diff --git a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py
index 046b56454..36c677b33 100644
--- a/backend/dataall/modules/shares_base/tasks/share_verifier_task.py
+++ b/backend/dataall/modules/shares_base/tasks/share_verifier_task.py
@@ -10,10 +10,10 @@
from dataall.base.loader import load_modules, ImportMode
root = logging.getLogger()
-root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
+log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
def verify_shares(engine):
diff --git a/backend/dataall/modules/worksheets/services/worksheet_service.py b/backend/dataall/modules/worksheets/services/worksheet_service.py
index b72efa367..128f2af94 100644
--- a/backend/dataall/modules/worksheets/services/worksheet_service.py
+++ b/backend/dataall/modules/worksheets/services/worksheet_service.py
@@ -66,6 +66,7 @@ def create_worksheet(session, username, data=None) -> Worksheet:
return worksheet
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_WORKSHEETS)
@ResourcePolicyService.has_resource_permission(UPDATE_WORKSHEET)
def update_worksheet(session, username, uri, data=None):
worksheet = WorksheetService.get_worksheet_by_uri(session, uri)
@@ -91,6 +92,7 @@ def get_worksheet(session, uri):
return worksheet
@staticmethod
+ @TenantPolicyService.has_tenant_permission(MANAGE_WORKSHEETS)
@ResourcePolicyService.has_resource_permission(DELETE_WORKSHEET)
def delete_worksheet(session, uri) -> bool:
worksheet = WorksheetService.get_worksheet_by_uri(session, uri)
diff --git a/backend/docker/prod/ecs/Dockerfile b/backend/docker/prod/ecs/Dockerfile
index 71478d3fc..ec45b32f9 100644
--- a/backend/docker/prod/ecs/Dockerfile
+++ b/backend/docker/prod/ecs/Dockerfile
@@ -2,7 +2,7 @@ FROM public.ecr.aws/amazonlinux/amazonlinux:2023
ARG NODE_VERSION=18
ARG NVM_VERSION=v0.37.2
-ARG DEEQU_VERSION=2.0.0-spark-3.1
+ARG DEEQU_VERSION=2.0.7-spark-3.3
ARG PYTHON_VERSION=python3.9
ARG CONTAINER_USER=cuser
ARG CONTAINER_USER_GROUP=cuser
diff --git a/backend/search_handler.py b/backend/search_handler.py
index 7985be272..ba38e45c8 100644
--- a/backend/search_handler.py
+++ b/backend/search_handler.py
@@ -1,21 +1,27 @@
import json
import os
+import logging
from dataall.base.context import RequestContext, set_context
from dataall.base.db import get_engine
from dataall.base.searchproxy import connect, run_query
-from dataall.base.utils.api_handler_utils import validate_and_block_if_maintenance_window, extract_groups
+from dataall.base.utils.api_handler_utils import validate_and_block_if_maintenance_window, extract_groups, redact_creds
from dataall.modules.maintenance.api.enums import MaintenanceModes
+logger = logging.getLogger()
+logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
+log = logging.getLogger(__name__)
+
ENVNAME = os.getenv('envname', 'local')
es = connect(envname=ENVNAME)
ENGINE = get_engine(envname=ENVNAME)
def handler(event, context):
- print('Received event')
- print(event)
+ event = redact_creds(event)
+ logger.info('Received event')
+ logger.info(event)
if event['httpMethod'] == 'OPTIONS':
return {
'statusCode': 200,
@@ -54,7 +60,7 @@ def handler(event, context):
return maintenance_window_validation_response
body = event.get('body')
- print(body)
+ logger.info(body)
success = True
try:
response = run_query(es, 'dataall-index', body)
diff --git a/deploy/custom_resources/cognito_config/cognito_urls.py b/deploy/custom_resources/cognito_config/cognito_urls.py
index 43be35c1f..7f0499f35 100644
--- a/deploy/custom_resources/cognito_config/cognito_urls.py
+++ b/deploy/custom_resources/cognito_config/cognito_urls.py
@@ -54,7 +54,7 @@ def setup_cognito(
updated_callbacks = existing_callbacks + list(set(config_callbacks) - set(existing_callbacks))
log.info(f'Updated CallBackUrls: {updated_callbacks}')
- config_logout_urls = [f'https://{signin_singout_link}']
+ config_logout_urls = [f'https://{signin_singout_link}', f'https://{user_guide_link}/']
existing_logout_urls = user_pool['UserPoolClient'].get('LogoutURLs', [])
updated_logout_urls = existing_logout_urls + list(set(config_logout_urls) - set(existing_logout_urls))
log.info(f'Updated LogOutUrls: {updated_logout_urls}')
diff --git a/deploy/custom_resources/custom_authorizer/auth_services.py b/deploy/custom_resources/custom_authorizer/auth_services.py
index f5991a22e..52efe14aa 100644
--- a/deploy/custom_resources/custom_authorizer/auth_services.py
+++ b/deploy/custom_resources/custom_authorizer/auth_services.py
@@ -28,7 +28,7 @@ def generate_policy(verified_claims: dict, effect, incoming_resource_str: str):
for claim_name, claim_value in verified_claims.items():
if isinstance(claim_value, list):
- verified_claims.update({claim_name: json.dumps(claim_value)})
+ verified_claims.update({claim_name: ','.join(claim_value)})
context = {**verified_claims}
diff --git a/deploy/custom_resources/custom_authorizer/custom_authorizer_lambda.py b/deploy/custom_resources/custom_authorizer/custom_authorizer_lambda.py
index ab710b1ae..47b9223e7 100644
--- a/deploy/custom_resources/custom_authorizer/custom_authorizer_lambda.py
+++ b/deploy/custom_resources/custom_authorizer/custom_authorizer_lambda.py
@@ -1,5 +1,6 @@
import logging
import os
+import json
from auth_services import AuthServices
from jwt_services import JWTServices
@@ -16,21 +17,33 @@
Custom Lambda Authorizer is attached to the API Gateway. Check the deploy/stacks/lambda_api.py for more details on deployment
"""
+OPENID_CONFIG_PATH = os.path.join(os.environ['custom_auth_url'], '.well-known', 'openid-configuration')
+JWT_SERVICE = JWTServices(OPENID_CONFIG_PATH)
+
def lambda_handler(incoming_event, context):
# Get the Token which is sent in the Authorization Header
+ logger.debug(incoming_event)
auth_token = incoming_event['headers']['Authorization']
if not auth_token:
- raise Exception('Unauthorized . Token not found')
+ raise Exception('Unauthorized. Missing JWT')
- verified_claims = JWTServices.validate_jwt_token(auth_token)
- logger.debug(verified_claims)
+ # Validate User is Active with Proper Access Token
+ user_info = JWT_SERVICE.validate_access_token(auth_token)
+
+ # Validate JWT
+ # Note: Removing the 7 Prefix Chars for 'Bearer ' from JWT
+ verified_claims = JWT_SERVICE.validate_jwt_token(auth_token[7:])
if not verified_claims:
raise Exception('Unauthorized. Token is not valid')
+ logger.debug(verified_claims)
+ # Generate Allow Policy w/ Context
effect = 'Allow'
+ verified_claims.update(user_info)
policy = AuthServices.generate_policy(verified_claims, effect, incoming_event['methodArn'])
- logger.debug('Generated policy is ', policy)
+ logger.debug(f'Generated policy is {json.dumps(policy)}')
+ print(f'Generated policy is {json.dumps(policy)}')
return policy
@@ -39,12 +52,13 @@ def lambda_handler(incoming_event, context):
# AWS Lambda and any other local environments
if __name__ == '__main__':
# for testing locally you can enter the JWT ID Token here
- token = ''
+ #
+ access_token = ''
account_id = ''
api_gw_id = ''
event = {
+ 'headers': {'Authorization': access_token},
'type': 'TOKEN',
- 'Authorization': token,
'methodArn': f'arn:aws:execute-api:us-east-1:{account_id}:{api_gw_id}/prod/POST/graphql/api',
}
lambda_handler(event, None)
diff --git a/deploy/custom_resources/custom_authorizer/jwt_services.py b/deploy/custom_resources/custom_authorizer/jwt_services.py
index 812a03f01..c1f2f6a5c 100644
--- a/deploy/custom_resources/custom_authorizer/jwt_services.py
+++ b/deploy/custom_resources/custom_authorizer/jwt_services.py
@@ -1,101 +1,81 @@
import os
import requests
-from jose import jwk
-from jose.jwt import get_unverified_header, decode, ExpiredSignatureError, JWTError
+import jwt
+
import logging
logger = logging.getLogger()
logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
-# Configs required to fetch public keys from JWKS
-ISSUER_CONFIGS = {
- f'{os.environ.get("custom_auth_url")}': {
- 'jwks_uri': f'{os.environ.get("custom_auth_jwks_url")}',
- 'allowed_audiences': f'{os.environ.get("custom_auth_client")}',
- },
-}
-
-issuer_keys = {}
-
-
-# instead of re-downloading the public keys every time
-# we download them only on cold start
-# https://aws.amazon.com/blogs/compute/container-reuse-in-lambda/
-def fetch_public_keys():
- try:
- for issuer, issuer_config in ISSUER_CONFIGS.items():
- jwks_response = requests.get(issuer_config['jwks_uri'])
- jwks_response.raise_for_status()
- jwks: dict = jwks_response.json()
- for key in jwks['keys']:
- value = {
- 'issuer': issuer,
- 'audience': issuer_config['allowed_audiences'],
- 'jwk': jwk.construct(key),
- 'public_key': jwk.construct(key).public_key(),
- }
- issuer_keys.update({key['kid']: value})
- except Exception as e:
- raise Exception(f'Unable to fetch public keys due to {str(e)}')
-
-
-fetch_public_keys()
# Options to validate the JWT token
-# Only modification from default is to turn off verify_at_hash as we don't provide the access token for this validation
+# Only modification from default is to turn off verify_aud as Cognito Access Token does not provide this claim
jwt_options = {
'verify_signature': True,
- 'verify_aud': True,
+ 'verify_aud': False,
'verify_iat': True,
'verify_exp': True,
'verify_nbf': True,
'verify_iss': True,
'verify_sub': True,
'verify_jti': True,
- 'verify_at_hash': False,
- 'require_aud': True,
- 'require_iat': True,
- 'require_exp': True,
- 'require_nbf': False,
- 'require_iss': True,
- 'require_sub': True,
- 'require_jti': True,
- 'require_at_hash': False,
- 'leeway': 0,
+ 'require': ['iat', 'exp', 'iss', 'sub', 'jti'],
}
class JWTServices:
- @staticmethod
- def validate_jwt_token(jwt_token):
+ def __init__(self, openid_config_path):
+ # Get OpenID Config JSON
+ self.openid_config = self._fetch_openid_config(openid_config_path)
+
+ # Init pyJWT.JWKClient with JWK URI
+ self.jwks_client = jwt.PyJWKClient(self.openid_config.get('jwks_uri'))
+
+ def _fetch_openid_config(self, openid_config_path):
+ response = requests.get(openid_config_path)
+ response.raise_for_status()
+ return response.json()
+
+ def validate_jwt_token(self, jwt_token) -> dict:
try:
- # Decode and verify the JWT token
- header = get_unverified_header(jwt_token)
- kid = header['kid']
- if kid not in issuer_keys:
- logger.info('Public key not found in provided set of keys')
- # Retry Fetching the public certificates again in case rotation occurs and lambda has cached the publicKeys
- fetch_public_keys()
- if kid not in issuer_keys:
- raise Exception('Unauthorized')
- public_key = issuer_keys.get(kid)
- payload = decode(
+ # get signing_key from JWT
+ signing_key = self.jwks_client.get_signing_key_from_jwt(jwt_token)
+
+ # Decode and Verify JWT
+ payload = jwt.decode(
jwt_token,
- public_key.get('jwk'),
+ signing_key.key,
algorithms=['RS256', 'HS256'],
- issuer=public_key.get('issuer'),
- audience=public_key.get('audience'),
+ issuer=os.environ['custom_auth_url'],
+ audience=os.environ.get('custom_auth_client'),
+ leeway=0,
options=jwt_options,
)
+ # verify client_id if Cognito JWT
+ if 'client_id' in payload and payload['client_id'] != os.environ.get('custom_auth_client'):
+ raise Exception('Invalid Client ID in JWT Token')
+
+ # verify cid for other IdPs
+ if 'cid' in payload and payload['cid'] != os.environ.get('custom_auth_client'):
+ raise Exception('Invalid Client ID in JWT Token')
+
return payload
- except ExpiredSignatureError:
+ except jwt.exceptions.ExpiredSignatureError as e:
logger.error('JWT token has expired.')
- return None
- except JWTError as e:
+ raise e
+ except jwt.exceptions.PyJWTError as e:
logger.error(f'JWT token validation failed: {str(e)}')
- return None
+ raise e
except Exception as e:
logger.error(f'Failed to validate token - {str(e)}')
- return None
+ raise e
+
+ def validate_access_token(self, access_token) -> dict:
+ # get UserInfo URI from OpenId Configuration
+ user_info_url = self.openid_config.get('userinfo_endpoint')
+ r = requests.get(user_info_url, headers={'Authorization': access_token})
+ r.raise_for_status()
+ logger.debug(r.json())
+ return r.json()
diff --git a/deploy/custom_resources/custom_authorizer/requirements.txt b/deploy/custom_resources/custom_authorizer/requirements.txt
index 14e5c340e..db3720bed 100644
--- a/deploy/custom_resources/custom_authorizer/requirements.txt
+++ b/deploy/custom_resources/custom_authorizer/requirements.txt
@@ -1,10 +1,9 @@
certifi==2024.7.4
charset-normalizer==3.1.0
-ecdsa==0.18.0
idna==3.7
pyasn1==0.5.0
-python-jose==3.3.0
requests==2.32.2
rsa==4.9
six==1.16.0
-urllib3==1.26.19
\ No newline at end of file
+urllib3==1.26.19
+pyjwt==2.9.0
\ No newline at end of file
diff --git a/deploy/pivot_role/pivotRole.yaml b/deploy/pivot_role/pivotRole.yaml
index 4612161e6..908f9d9c1 100644
--- a/deploy/pivot_role/pivotRole.yaml
+++ b/deploy/pivot_role/pivotRole.yaml
@@ -447,6 +447,16 @@ Resources:
- !Sub 'arn:aws:iam::${AWS::AccountId}:policy/${EnvironmentResourcePrefix}*'
- !Sub 'arn:aws:iam::${AWS::AccountId}:policy/targetDatasetAccessControlPolicy'
- !Sub 'arn:aws:iam::${AWS::AccountId}:policy/dataall-targetDatasetS3Bucket-AccessControlPolicy'
+ - Sid: IAMRolePolicyDeny
+ Action:
+ - 'iam:Update*'
+ - 'iam:Delete*'
+ - 'iam:Put*'
+ - 'iam:AttachRolePolicy'
+ - 'iam:DetachRolePolicy'
+ Effect: Deny
+ Resource:
+ - !Sub 'arn:aws:iam::${AWS::AccountId}:role/${PivotRoleName}'
- Sid: IAMPassRole
Action:
- 'iam:PassRole'
diff --git a/deploy/requirements.txt b/deploy/requirements.txt
index 3ac23e4da..a67fb2621 100644
--- a/deploy/requirements.txt
+++ b/deploy/requirements.txt
@@ -3,4 +3,6 @@ boto3-stubs==1.20.20
boto3==1.28.23
botocore==1.31.23
cdk-nag==2.7.2
+typeguard==4.2.1
+cdk-klayers==0.3.0
constructs>=10.0.0,<11.0.0
diff --git a/deploy/stacks/auth_at_edge.py b/deploy/stacks/auth_at_edge.py
index 2076adaa4..b954a8a0f 100644
--- a/deploy/stacks/auth_at_edge.py
+++ b/deploy/stacks/auth_at_edge.py
@@ -23,7 +23,7 @@ def __init__(self, scope, id, envname='dev', resource_prefix='dataall', **kwargs
f'{resource_prefix}-{envname}-authatedge',
location={
'applicationId': 'arn:aws:serverlessrepo:us-east-1:520945424137:applications/cloudfront-authorization-at-edge',
- 'semanticVersion': '2.3.0',
+ 'semanticVersion': '2.3.2',
},
parameters={
'UserPoolArn': userpool_arn,
diff --git a/deploy/stacks/backend_stack.py b/deploy/stacks/backend_stack.py
index dbfc94c9b..d7c95d0e2 100644
--- a/deploy/stacks/backend_stack.py
+++ b/deploy/stacks/backend_stack.py
@@ -43,6 +43,7 @@ def __init__(
vpc_endpoints_sg=None,
internet_facing=True,
custom_domain=None,
+ apigw_custom_domain=None,
ip_ranges=None,
apig_vpce=None,
prod_sizing=False,
@@ -190,12 +191,14 @@ def __init__(
apig_vpce=apig_vpce,
prod_sizing=prod_sizing,
user_pool=cognito_stack.user_pool if custom_auth is None else None,
+ user_pool_client=cognito_stack.client if custom_auth is None else None,
pivot_role_name=self.pivot_role_name,
reauth_ttl=reauth_config.get('ttl', 5) if reauth_config else 5,
email_notification_sender_email_id=email_sender,
email_custom_domain=ses_stack.ses_identity.email_identity_name if ses_stack is not None else None,
ses_configuration_set=ses_stack.configuration_set.configuration_set_name if ses_stack is not None else None,
custom_domain=custom_domain,
+ apigw_custom_domain=apigw_custom_domain,
custom_auth=custom_auth,
custom_waf_rules=custom_waf_rules,
**kwargs,
diff --git a/deploy/stacks/backend_stage.py b/deploy/stacks/backend_stage.py
index 361fd4d2b..400f95915 100644
--- a/deploy/stacks/backend_stage.py
+++ b/deploy/stacks/backend_stage.py
@@ -21,6 +21,7 @@ def __init__(
vpc_endpoints_sg=None,
internet_facing=True,
custom_domain=None,
+ apigw_custom_domain=None,
ip_ranges=None,
apig_vpce=None,
prod_sizing=False,
@@ -54,6 +55,7 @@ def __init__(
vpc_endpoints_sg=vpc_endpoints_sg,
internet_facing=internet_facing,
custom_domain=custom_domain,
+ apigw_custom_domain=apigw_custom_domain,
ip_ranges=ip_ranges,
apig_vpce=apig_vpce,
prod_sizing=prod_sizing,
diff --git a/deploy/stacks/container.py b/deploy/stacks/container.py
index bbad9efa5..63d717a48 100644
--- a/deploy/stacks/container.py
+++ b/deploy/stacks/container.py
@@ -47,12 +47,13 @@ def __init__(
self._ecr_repository = ecr_repository
self._vpc = vpc
self._prod_sizing = prod_sizing
+ self._log_level = 'INFO' if prod_sizing else 'DEBUG'
(self.scheduled_tasks_sg, self.share_manager_sg) = self.create_ecs_security_groups(
envname, resource_prefix, vpc, vpce_connection, s3_prefix_list, lambdas
)
self.ecs_security_groups: [aws_ec2.SecurityGroup] = [self.scheduled_tasks_sg, self.share_manager_sg]
- self.env_vars = self._create_env('INFO')
+ self.env_vars = self._create_env()
# Check if custom domain exists and if it exists email notifications could be enabled.
# Create an env variable which stores the domain URL.
@@ -146,7 +147,7 @@ def __init__(
command=['python3.9', '-m', 'dataall.core.environment.tasks.env_stacks_updater'],
container_id='container',
ecr_repository=ecr_repository,
- environment=self._create_env('INFO'),
+ environment=self._create_env(),
image_tag=self._cdkproxy_image_tag,
log_group=self.create_log_group(envname, resource_prefix, log_group_name='stacks-updater'),
schedule_expression=Schedule.expression('cron(0 1 * * ? *)'),
@@ -201,7 +202,7 @@ def add_catalog_indexer_task(self):
command=['python3.9', '-m', 'dataall.modules.catalog.tasks.catalog_indexer_task'],
container_id=container_id,
ecr_repository=self._ecr_repository,
- environment=self._create_env('INFO'),
+ environment=self._create_env(),
image_tag=self._cdkproxy_image_tag,
log_group=self.create_log_group(self._envname, self._resource_prefix, log_group_name='catalog-indexer'),
schedule_expression=Schedule.expression('rate(6 hours)'),
@@ -245,7 +246,7 @@ def add_share_management_task(self):
f'ShareManagementTaskContainer{self._envname}',
container_name='container',
image=ecs.ContainerImage.from_ecr_repository(repository=self._ecr_repository, tag=self._cdkproxy_image_tag),
- environment=self._create_env('DEBUG'),
+ environment=self._create_env(),
command=['python3.9', '-m', 'dataall.modules.shares_base.tasks.share_manager_task'],
logging=ecs.LogDriver.aws_logs(
stream_prefix='task',
@@ -276,7 +277,7 @@ def add_share_verifier_task(self):
command=['python3.9', '-m', 'dataall.modules.shares_base.tasks.share_verifier_task'],
container_id='container',
ecr_repository=self._ecr_repository,
- environment=self._create_env('INFO'),
+ environment=self._create_env(),
image_tag=self._cdkproxy_image_tag,
log_group=self.create_log_group(self._envname, self._resource_prefix, log_group_name='share-verifier'),
schedule_expression=Schedule.expression('rate(7 days)'),
@@ -305,7 +306,7 @@ def add_share_reapplier_task(self):
f'ShareReapplierTaskContainer{self._envname}',
container_name='container',
image=ecs.ContainerImage.from_ecr_repository(repository=self._ecr_repository, tag=self._cdkproxy_image_tag),
- environment=self._create_env('INFO'),
+ environment=self._create_env(),
command=['python3.9', '-m', 'dataall.modules.shares_base.tasks.share_reapplier_task'],
logging=ecs.LogDriver.aws_logs(
stream_prefix='task',
@@ -367,7 +368,7 @@ def add_subscription_task(self):
],
container_id='container',
ecr_repository=self._ecr_repository,
- environment=self._create_env('INFO'),
+ environment=self._create_env(),
image_tag=self._cdkproxy_image_tag,
log_group=self.create_log_group(self._envname, self._resource_prefix, log_group_name='subscriptions'),
schedule_expression=Schedule.expression('rate(15 minutes)'),
@@ -387,7 +388,7 @@ def add_sync_dataset_table_task(self):
command=['python3.9', '-m', 'dataall.modules.s3_datasets.tasks.tables_syncer'],
container_id='container',
ecr_repository=self._ecr_repository,
- environment=self._create_env('INFO'),
+ environment=self._create_env(),
image_tag=self._cdkproxy_image_tag,
log_group=self.create_log_group(self._envname, self._resource_prefix, log_group_name='tables-syncer'),
schedule_expression=Schedule.expression('rate(15 minutes)'),
@@ -407,7 +408,7 @@ def add_omics_fetch_workflows_task(self):
command=['python3.9', '-m', 'dataall.modules.omics.tasks.omics_workflows_fetcher'],
container_id='container',
ecr_repository=self._ecr_repository,
- environment=self._create_env('DEBUG'),
+ environment=self._create_env(),
image_tag=self._cdkproxy_image_tag,
log_group=self.create_log_group(
self._envname, self._resource_prefix, log_group_name='omics-workflows-fetcher'
@@ -740,10 +741,10 @@ def set_scheduled_task(
def ecs_task_role(self) -> iam.Role:
return self.task_role
- def _create_env(self, log_lvl) -> Dict:
+ def _create_env(self) -> Dict:
return {
'AWS_REGION': self.region,
'envname': self._envname,
- 'LOGLEVEL': log_lvl,
+ 'LOG_LEVEL': self._log_level,
'config_location': '/config.json',
}
diff --git a/deploy/stacks/lambda_api.py b/deploy/stacks/lambda_api.py
index fb8789154..797a9097f 100644
--- a/deploy/stacks/lambda_api.py
+++ b/deploy/stacks/lambda_api.py
@@ -14,12 +14,17 @@
aws_kms as kms,
aws_sqs as sqs,
aws_logs as logs,
+ aws_route53 as r53,
+ aws_route53_targets as r53_targets,
Duration,
CfnOutput,
Fn,
RemovalPolicy,
BundlingOptions,
)
+from cdk_klayers import Klayers
+from aws_cdk.aws_apigateway import DomainNameOptions, EndpointType, SecurityPolicy
+from aws_cdk.aws_certificatemanager import Certificate
from aws_cdk.aws_ec2 import (
InterfaceVpcEndpoint,
InterfaceVpcEndpointAwsService,
@@ -50,17 +55,22 @@ def __init__(
apig_vpce=None,
prod_sizing=False,
user_pool=None,
+ user_pool_client=None,
pivot_role_name=None,
reauth_ttl=5,
email_notification_sender_email_id=None,
email_custom_domain=None,
ses_configuration_set=None,
custom_domain=None,
+ apigw_custom_domain=None,
custom_auth=None,
**kwargs,
):
super().__init__(scope, id, **kwargs)
+ self.apigw_custom_domain = apigw_custom_domain
+ log_level = 'INFO' if prod_sizing else 'DEBUG'
+
if self.node.try_get_context('image_tag'):
image_tag = self.node.try_get_context('image_tag')
@@ -96,7 +106,7 @@ def __init__(
self.esproxy_dlq = self.set_dlq(f'{resource_prefix}-{envname}-esproxy-dlq')
esproxy_sg = self.create_lambda_sgs(envname, 'esproxy', resource_prefix, vpc)
- esproxy_env = {'envname': envname, 'LOG_LEVEL': 'INFO'}
+ esproxy_env = {'envname': envname, 'LOG_LEVEL': log_level}
if custom_auth:
esproxy_env['custom_auth'] = custom_auth.get('provider', None)
self.elasticsearch_proxy_handler = _lambda.DockerImageFunction(
@@ -125,7 +135,11 @@ def __init__(
self.api_handler_dlq = self.set_dlq(f'{resource_prefix}-{envname}-graphql-dlq')
api_handler_sg = self.create_lambda_sgs(envname, 'apihandler', resource_prefix, vpc)
- api_handler_env = {'envname': envname, 'LOG_LEVEL': 'INFO', 'REAUTH_TTL': str(reauth_ttl)}
+ api_handler_env = {
+ 'envname': envname,
+ 'LOG_LEVEL': log_level,
+ 'REAUTH_TTL': str(reauth_ttl)
+ }
# Check if custom domain exists and if it exists email notifications could be enabled. Create a env variable which stores the domain url. This is used for sending data.all share weblinks in the email notifications.
if custom_domain and custom_domain.get('hosted_zone_name', None):
api_handler_env['frontend_domain_url'] = f'https://{custom_domain.get("hosted_zone_name", None)}'
@@ -159,7 +173,7 @@ def __init__(
awsworker_sg = self.create_lambda_sgs(envname, 'awsworker', resource_prefix, vpc)
awshandler_env = {
'envname': envname,
- 'LOG_LEVEL': 'INFO',
+ 'LOG_LEVEL': log_level,
'email_sender_id': email_notification_sender_email_id,
}
self.aws_handler = _lambda.DockerImageFunction(
@@ -204,72 +218,94 @@ def __init__(
)
)
- if custom_auth is not None:
- # Create the custom authorizer lambda
- custom_authorizer_assets = os.path.realpath(
- os.path.join(
- os.path.dirname(__file__),
- '..',
- 'custom_resources',
- 'custom_authorizer',
- )
+ # Create the custom authorizer lambda
+ custom_authorizer_assets = os.path.realpath(
+ os.path.join(
+ os.path.dirname(__file__),
+ '..',
+ 'custom_resources',
+ 'custom_authorizer',
)
+ )
+ ## GET CONGITO USERL POOL ID and APP CLIENT ID
- if not os.path.isdir(custom_authorizer_assets):
- raise Exception(f'Custom Authorizer Folder not found at {custom_authorizer_assets}')
+ if not os.path.isdir(custom_authorizer_assets):
+ raise Exception(f'Custom Authorizer Folder not found at {custom_authorizer_assets}')
- custom_lambda_env = {
- 'envname': envname,
- 'LOG_LEVEL': 'DEBUG',
- 'custom_auth_provider': custom_auth.get('provider'),
- 'custom_auth_url': custom_auth.get('url'),
- 'custom_auth_client': custom_auth.get('client_id'),
- 'custom_auth_jwks_url': custom_auth.get('jwks_url'),
- }
+ custom_lambda_env = {
+ 'envname': envname,
+ 'LOG_LEVEL': log_level,
+ }
+ if custom_auth:
+ custom_lambda_env.update(
+ {
+ 'custom_auth_provider': custom_auth.get('provider'),
+ 'custom_auth_url': custom_auth.get('url'),
+ 'custom_auth_client': custom_auth.get('client_id'),
+ }
+ )
for claims_map in custom_auth.get('claims_mapping', {}):
custom_lambda_env[claims_map] = custom_auth.get('claims_mapping', '').get(claims_map, '')
+ else:
+ custom_lambda_env.update(
+ {
+ 'custom_auth_provider': 'Cognito',
+ 'custom_auth_url': f'https://cognito-idp.{self.region}.amazonaws.com/{user_pool.user_pool_id}',
+ 'custom_auth_client': user_pool_client.user_pool_client_id,
+ 'email': 'email',
+ 'user_id': 'email',
+ }
+ )
+
+ # Initialize Klayers
+ runtime = _lambda.Runtime.PYTHON_3_9
+ klayers = Klayers(self, python_version=runtime, region=self.region)
- authorizer_fn_sg = self.create_lambda_sgs(envname, 'customauthorizer', resource_prefix, vpc)
- self.authorizer_fn = _lambda.Function(
+ # get the latest layer version for the cryptography package
+ cryptography_layer = klayers.layer_version(self, 'cryptography')
+
+ authorizer_fn_sg = self.create_lambda_sgs(envname, 'customauthorizer', resource_prefix, vpc)
+ self.authorizer_fn = _lambda.Function(
+ self,
+ f'CustomAuthorizerFunction-{envname}',
+ function_name=f'{resource_prefix}-{envname}-custom-authorizer',
+ log_group=logs.LogGroup(
self,
- f'CustomAuthorizerFunction-{envname}',
- function_name=f'{resource_prefix}-{envname}-custom-authorizer',
- log_group=logs.LogGroup(
- self,
- 'customauthorizerloggroup',
- log_group_name=f'/aws/lambda/{resource_prefix}-{envname}-custom-authorizer',
- ),
- handler='custom_authorizer_lambda.lambda_handler',
- code=_lambda.Code.from_asset(
- path=custom_authorizer_assets,
- bundling=BundlingOptions(
- image=_lambda.Runtime.PYTHON_3_9.bundling_image,
- local=SolutionBundling(source_path=custom_authorizer_assets),
- ),
+ 'customauthorizerloggroup',
+ log_group_name=f'/aws/lambda/{resource_prefix}-{envname}-custom-authorizer',
+ ),
+ handler='custom_authorizer_lambda.lambda_handler',
+ code=_lambda.Code.from_asset(
+ path=custom_authorizer_assets,
+ bundling=BundlingOptions(
+ image=_lambda.Runtime.PYTHON_3_9.bundling_image,
+ local=SolutionBundling(source_path=custom_authorizer_assets),
),
- memory_size=512 if prod_sizing else 256,
- description='dataall Custom authorizer replacing cognito authorizer',
- timeout=Duration.seconds(20),
- environment=custom_lambda_env,
- environment_encryption=lambda_env_key,
- vpc=vpc,
- security_groups=[authorizer_fn_sg],
- runtime=_lambda.Runtime.PYTHON_3_9,
- )
+ ),
+ memory_size=512 if prod_sizing else 256,
+ description='dataall Custom authorizer replacing cognito authorizer',
+ timeout=Duration.seconds(20),
+ environment=custom_lambda_env,
+ environment_encryption=lambda_env_key,
+ vpc=vpc,
+ security_groups=[authorizer_fn_sg],
+ runtime=runtime,
+ layers=[cryptography_layer],
+ )
- # Add NAT Connectivity For Custom Authorizer Lambda
- self.authorizer_fn.connections.allow_to(
- ec2.Peer.any_ipv4(), ec2.Port.tcp(443), 'Allow NAT Internet Access SG Egress'
- )
+ # Add NAT Connectivity For Custom Authorizer Lambda
+ self.authorizer_fn.connections.allow_to(
+ ec2.Peer.any_ipv4(), ec2.Port.tcp(443), 'Allow NAT Internet Access SG Egress'
+ )
- # Store custom authorizer's ARN in ssm
- ssm.StringParameter(
- self,
- f'{resource_prefix}-{envname}-custom-authorizer-arn',
- parameter_name=f'/dataall/{envname}/customauth/customauthorizerarn',
- string_value=self.authorizer_fn.function_arn,
- )
+ # Store custom authorizer's ARN in ssm
+ ssm.StringParameter(
+ self,
+ f'{resource_prefix}-{envname}-custom-authorizer-arn',
+ parameter_name=f'/dataall/{envname}/customauth/customauthorizerarn',
+ string_value=self.authorizer_fn.function_arn,
+ )
# Add VPC Endpoint Connectivity
if vpce_connection:
@@ -560,41 +596,31 @@ def set_up_graphql_api_gateway(
user_pool,
custom_auth,
):
- if custom_auth is None:
- cognito_authorizer = apigw.CognitoUserPoolsAuthorizer(
- self,
- 'CognitoAuthorizer',
- cognito_user_pools=[user_pool],
- authorizer_name=f'{resource_prefix}-{envname}-cognito-authorizer',
- identity_source='method.request.header.Authorization',
- results_cache_ttl=Duration.minutes(60),
- )
- else:
- # Create a custom Authorizer
- custom_authorizer_role = iam.Role(
- self,
- f'{resource_prefix}-{envname}-custom-authorizer-role',
- role_name=f'{resource_prefix}-{envname}-custom-authorizer-role',
- assumed_by=iam.ServicePrincipal('apigateway.amazonaws.com'),
- description='Allow Custom Authorizer to call custom auth lambda',
- )
- custom_authorizer_role.add_to_policy(
- iam.PolicyStatement(
- effect=iam.Effect.ALLOW,
- actions=['lambda:InvokeFunction'],
- resources=[self.authorizer_fn.function_arn],
- )
+ # Create a custom Authorizer
+ custom_authorizer_role = iam.Role(
+ self,
+ f'{resource_prefix}-{envname}-custom-authorizer-role',
+ role_name=f'{resource_prefix}-{envname}-custom-authorizer-role',
+ assumed_by=iam.ServicePrincipal('apigateway.amazonaws.com'),
+ description='Allow Custom Authorizer to call custom auth lambda',
+ )
+ custom_authorizer_role.add_to_policy(
+ iam.PolicyStatement(
+ effect=iam.Effect.ALLOW,
+ actions=['lambda:InvokeFunction'],
+ resources=[self.authorizer_fn.function_arn],
)
+ )
- custom_authorizer = apigw.RequestAuthorizer(
- self,
- 'CustomAuthorizer',
- handler=self.authorizer_fn,
- identity_sources=[apigw.IdentitySource.header('Authorization')],
- authorizer_name=f'{resource_prefix}-{envname}-custom-authorizer',
- assume_role=custom_authorizer_role,
- results_cache_ttl=Duration.minutes(60),
- )
+ custom_authorizer = apigw.RequestAuthorizer(
+ self,
+ 'CustomAuthorizer',
+ handler=self.authorizer_fn,
+ identity_sources=[apigw.IdentitySource.header('Authorization')],
+ authorizer_name=f'{resource_prefix}-{envname}-custom-authorizer',
+ assume_role=custom_authorizer_role,
+ results_cache_ttl=Duration.minutes(1),
+ )
if not internet_facing:
if apig_vpce:
api_vpc_endpoint = InterfaceVpcEndpoint.from_interface_vpc_endpoint_attributes(
@@ -642,6 +668,7 @@ def set_up_graphql_api_gateway(
types=[apigw.EndpointType.PRIVATE], vpc_endpoints=[api_vpc_endpoint]
),
policy=api_policy,
+ disable_execute_api_endpoint=bool(self.apigw_custom_domain),
)
else:
gw = apigw.RestApi(
@@ -649,8 +676,35 @@ def set_up_graphql_api_gateway(
backend_api_name,
rest_api_name=backend_api_name,
deploy_options=api_deploy_options,
+ disable_execute_api_endpoint=bool(self.apigw_custom_domain),
+ )
+
+ if self.apigw_custom_domain:
+ certificate = Certificate.from_certificate_arn(
+ self, 'CustomDomainCertificate', self.apigw_custom_domain['certificate_arn']
)
- api_url = gw.url
+ gw.add_domain_name(
+ 'ApiGwCustomDomainName',
+ certificate=certificate,
+ domain_name=self.apigw_custom_domain['hosted_zone_name'],
+ endpoint_type=EndpointType.EDGE if internet_facing else EndpointType.PRIVATE,
+ security_policy=SecurityPolicy.TLS_1_2,
+ )
+ r53.ARecord(
+ self,
+ 'ApiGwARecordId',
+ zone=r53.HostedZone.from_hosted_zone_attributes(
+ self,
+ 'ApiGwHostedZoneId',
+ hosted_zone_id=self.apigw_custom_domain['hosted_zone_id'],
+ zone_name=self.apigw_custom_domain['hosted_zone_name'],
+ ),
+ target=r53.RecordTarget.from_alias(r53_targets.ApiGateway(gw)),
+ )
+ api_url = f'https://{gw.domain_name.domain_name}/'
+ else:
+ api_url = gw.url
+
integration = apigw.LambdaIntegration(api_handler)
request_validator = apigw.RequestValidator(
self,
@@ -701,10 +755,8 @@ def set_up_graphql_api_gateway(
)
graphql_proxy.add_method(
'POST',
- authorizer=cognito_authorizer if custom_auth is None else custom_authorizer,
- authorization_type=apigw.AuthorizationType.COGNITO
- if custom_auth is None
- else apigw.AuthorizationType.CUSTOM,
+ authorizer=custom_authorizer,
+ authorization_type=apigw.AuthorizationType.CUSTOM,
request_validator=request_validator,
request_models={'application/json': graphql_validation_model},
)
@@ -743,10 +795,8 @@ def set_up_graphql_api_gateway(
)
search_proxy.add_method(
'POST',
- authorizer=cognito_authorizer if custom_auth is None else custom_authorizer,
- authorization_type=apigw.AuthorizationType.COGNITO
- if custom_auth is None
- else apigw.AuthorizationType.CUSTOM,
+ authorizer=custom_authorizer,
+ authorization_type=apigw.AuthorizationType.CUSTOM,
request_validator=request_validator,
request_models={'application/json': search_validation_model},
)
diff --git a/deploy/stacks/pipeline.py b/deploy/stacks/pipeline.py
index 11f77288e..8664ae269 100644
--- a/deploy/stacks/pipeline.py
+++ b/deploy/stacks/pipeline.py
@@ -635,6 +635,7 @@ def set_backend_stage(self, target_env, repository_name):
vpc_restricted_nacls=target_env.get('vpc_restricted_nacl', False),
internet_facing=target_env.get('internet_facing', True),
custom_domain=target_env.get('custom_domain'),
+ apigw_custom_domain=target_env.get('apigw_custom_domain'),
ip_ranges=target_env.get('ip_ranges'),
apig_vpce=target_env.get('apig_vpce'),
prod_sizing=target_env.get('prod_sizing', True),
@@ -685,6 +686,8 @@ def set_approval_tests_stage(
'aws sts get-caller-identity --profile buildprofile',
f'export COGNITO_CLIENT=$(aws ssm get-parameter --name /dataall/{target_env["envname"]}/cognito/appclient --profile buildprofile --output text --query "Parameter.Value")',
f'export API_ENDPOINT=$(aws ssm get-parameter --name /dataall/{target_env["envname"]}/apiGateway/backendUrl --profile buildprofile --output text --query "Parameter.Value")',
+ f'export IDP_DOMAIN_URL=https://$(aws ssm get-parameter --name /dataall/{target_env["envname"]}/cognito/domain --profile buildprofile --output text --query "Parameter.Value").auth.{target_env["region"]}.amazoncognito.com',
+ f'export DATAALL_DOMAIN_URL=https://$(aws ssm get-parameter --region us-east-1 --name /dataall/{target_env["envname"]}/CloudfrontDistributionDomainName --profile buildprofile --output text --query "Parameter.Value")',
f'export TESTDATA=$(aws ssm get-parameter --name /dataall/{target_env["envname"]}/testdata --profile buildprofile --output text --query "Parameter.Value")',
f'export ENVNAME={target_env["envname"]}',
f'export AWS_REGION={target_env["region"]}',
diff --git a/documentation/userguide/docs/environments.md b/documentation/userguide/docs/environments.md
index 8a4c41cb3..3bbd92d15 100644
--- a/documentation/userguide/docs/environments.md
+++ b/documentation/userguide/docs/environments.md
@@ -45,6 +45,9 @@ cdk bootstrap --trust DATA.ALL_AWS_ACCOUNT_NUMBER -c @aws-cdk/core:newStyleStac
````bash
cdk bootstrap --trust 222222222222 -c @aws-cdk/core:newStyleStackSynthesis=true --cloudformation-execution-policies arn:aws:iam::aws:policy/AdministratorAccess aws://333333333333/eu-west-1
````
+
+!!! danger "After deleting an environment it is strongly recommended to untrust data.all infrastructure account. Read more [here](#delete-an-environment)"
+
#### Restricted CDK Execution role
In the above command we define the `--cloudformation-execution-policies` to use the AdministratorAccess policy `arn:aws:iam::aws:policy/AdministratorAccess`.
This is the default policy that CDK uses to deploy resources, nevertheless it is possible to restrict it to any IAM policy created in the account.
@@ -235,6 +238,16 @@ In the chosen environment, next to the Edit button, click on the **Delete** butt
the delete display. Don't ignore it! Before deleting an environment, clean it up: delete its datasets and other
resources.
+!!! danger "Untrust *data.all* infrastructure account"
+ A message like this one: *"After removal users must untrust the data.all account manually from env account CDKToolkit stack!"* appears in
+ the delete display. Don't ignore it!
+ When you [boostrapped](#1-cdk-bootstrap) the environment account you explicitly "trusted" (using the `--trust ` flag) the infrastructure
+ account to make deployments to your account.
+
+ * If you don't want to make CDK deployments (not necesserily related to data.all) to that account/region you can completely remove the CDKToolkit stack from CFN
+
+ * If you want to continue using the account/region for other CDK deployments you must untrust the data.all account by rerunning `cdk bootstrap --trust --trust ...`
+
Note that we can keep the environment CloudFormation stack. What is this for? This is useful in case you want to keep
using the environment resources (IAM roles, etc) created by *data.all* but outside of *data.all*
diff --git a/documentation/userguide/docs/js/extra.js b/documentation/userguide/docs/js/extra.js
new file mode 100644
index 000000000..924f1be01
--- /dev/null
+++ b/documentation/userguide/docs/js/extra.js
@@ -0,0 +1,20 @@
+document.addEventListener('DOMContentLoaded', function() {
+ var header = document.querySelector('.md-header-nav');
+ if (header) {
+ var button = document.createElement('a');
+ button.textContent = 'Sign Out';
+ button.className = 'md-header-nav__button md-icon signout-button';
+
+ // Button Click Event Listener
+ button.addEventListener('click', function(event) {
+ event.preventDefault();
+ try {
+ // Handle sign-out logic here
+ window.location.href = '/signout';
+ } catch (error) {
+ console.error('Error during sign-out:', error);
+ }
+ });
+ header.appendChild(button);
+ }
+});
diff --git a/documentation/userguide/mkdocs.yml b/documentation/userguide/mkdocs.yml
index b4f66db6d..aa836ec7b 100644
--- a/documentation/userguide/mkdocs.yml
+++ b/documentation/userguide/mkdocs.yml
@@ -68,3 +68,6 @@ markdown_extensions:
extra_css:
- assets/extra.css
+
+extra_javascript:
+ - js/extra.js
\ No newline at end of file
diff --git a/frontend/src/authentication/components/MaintenanceGuard.js b/frontend/src/authentication/components/MaintenanceGuard.js
index 0aa21f833..6d81acf33 100644
--- a/frontend/src/authentication/components/MaintenanceGuard.js
+++ b/frontend/src/authentication/components/MaintenanceGuard.js
@@ -1,6 +1,6 @@
import PropTypes from 'prop-types';
import { useEffect, useState } from 'react';
-import { isModuleEnabled, ModuleNames } from 'utils';
+import { isModuleEnabled, isTenantUser, ModuleNames } from 'utils';
import { useClient, useGroups } from 'services';
import { LoadingScreen, NoAccessMaintenanceWindow } from 'design';
import { getMaintenanceStatus } from '../../modules/Maintenance/services';
@@ -25,7 +25,7 @@ export const MaintenanceGuard = (props) => {
response.data.getMaintenanceWindowStatus.status
) &&
response.data.getMaintenanceWindowStatus.mode === 'NO-ACCESS' &&
- !groups.includes('DAAdministrators')
+ !isTenantUser(groups)
) {
setNoAccessMaintenanceFlag(true);
} else {
diff --git a/frontend/src/authentication/contexts/GenericAuthContext.js b/frontend/src/authentication/contexts/GenericAuthContext.js
index 07fbcc4df..d1fec575b 100644
--- a/frontend/src/authentication/contexts/GenericAuthContext.js
+++ b/frontend/src/authentication/contexts/GenericAuthContext.js
@@ -84,7 +84,8 @@ export const GenericAuthProvider = (props) => {
email: user.email,
name: user.email,
id_token: user.id_token,
- short_id: user.short_id
+ short_id: user.short_id,
+ access_token: user.access_token
}
}
});
@@ -129,7 +130,8 @@ export const GenericAuthProvider = (props) => {
email: user.email,
name: user.email,
id_token: user.id_token,
- short_id: user.short_id
+ short_id: user.short_id,
+ access_token: user.access_token
}
}
});
@@ -178,6 +180,7 @@ export const GenericAuthProvider = (props) => {
process.env.REACT_APP_CUSTOM_AUTH_EMAIL_CLAIM_MAPPING
],
id_token: auth.user.id_token,
+ access_token: auth.user.access_token,
short_id:
auth.user.profile[
process.env.REACT_APP_CUSTOM_AUTH_USERID_CLAIM_MAPPING
@@ -188,6 +191,7 @@ export const GenericAuthProvider = (props) => {
return {
email: user.attributes.email,
id_token: user.signInUserSession.idToken.jwtToken,
+ access_token: user.signInUserSession.accessToken.jwtToken,
short_id: 'none'
};
}
@@ -240,7 +244,7 @@ export const GenericAuthProvider = (props) => {
}
});
} else {
- await Auth.signOut();
+ await Auth.signOut({ global: true });
dispatch({
type: 'LOGOUT',
payload: {
@@ -271,7 +275,7 @@ export const GenericAuthProvider = (props) => {
console.error('Failed to ReAuth', error);
}
} else {
- await Auth.signOut();
+ await Auth.signOut({ global: true });
dispatch({
type: 'REAUTH',
payload: {
diff --git a/frontend/src/authentication/hooks/useToken.js b/frontend/src/authentication/hooks/useToken.js
index 08cfd5668..8b17536db 100644
--- a/frontend/src/authentication/hooks/useToken.js
+++ b/frontend/src/authentication/hooks/useToken.js
@@ -20,14 +20,14 @@ export const useToken = () => {
if (!auth.user) {
await auth.signinSilent();
}
- const t = auth.user.id_token;
+ const t = auth.user.access_token;
setToken(t);
} catch (error) {
if (!auth) throw Error('User Token Not Found !');
}
} else {
const session = await Auth.currentSession();
- const t = await session.getIdToken().getJwtToken();
+ const t = await session.getAccessToken().getJwtToken();
setToken(t);
}
} catch (error) {
diff --git a/frontend/src/design/components/DeleteObjectWithFrictionModal.js b/frontend/src/design/components/DeleteObjectWithFrictionModal.js
index 1956d6a2e..fa626ab58 100644
--- a/frontend/src/design/components/DeleteObjectWithFrictionModal.js
+++ b/frontend/src/design/components/DeleteObjectWithFrictionModal.js
@@ -22,6 +22,7 @@ export const DeleteObjectWithFrictionModal = (props) => {
open,
deleteFunction,
isAWSResource = true,
+ confirmMessage = 'permanently delete',
...other
} = props;
const [confirmValue, setConfirmValue] = useState(null);
@@ -72,14 +73,15 @@ export const DeleteObjectWithFrictionModal = (props) => {
)}
- To confirm deletion, type permanently delete in the text
- input field.
+ To confirm deletion, type {confirmMessage} in the text input
+ field.
{
}
color="error"
type="submit"
@@ -114,5 +116,6 @@ DeleteObjectWithFrictionModal.propTypes = {
onClose: PropTypes.func,
deleteFunction: PropTypes.func.isRequired,
open: PropTypes.bool.isRequired,
- isAWSResource: PropTypes.bool
+ isAWSResource: PropTypes.bool,
+ confirmMessage: PropTypes.string
};
diff --git a/frontend/src/design/components/popovers/AccountPopover.js b/frontend/src/design/components/popovers/AccountPopover.js
index eefacaa72..4772991b7 100644
--- a/frontend/src/design/components/popovers/AccountPopover.js
+++ b/frontend/src/design/components/popovers/AccountPopover.js
@@ -16,6 +16,7 @@ import { useGroups } from 'services';
import { CogIcon } from '../../icons';
import { TextAvatar } from '../TextAvatar';
import { useAuth } from 'authentication';
+import { isTenantUser } from 'utils';
export const AccountPopover = () => {
const anchorRef = useRef(null);
@@ -84,7 +85,7 @@ export const AccountPopover = () => {
- {groups && groups.indexOf('DAAdministrators') !== -1 && (
+ {isTenantUser(groups) && (