Skip to content

Commit

Permalink
2.6.1 Security features (#1686)
Browse files Browse the repository at this point in the history
### Feature or Bugfix
- Security

### Detail
* get-parameter CloudfrontDistributionDomainName from us-east-1 (#1687 )
* Added Token Validations (#1682)
* add warning to untrust data.all account when removing an environment
(#1685)
* add custom domain support for apigw (#1679)
* Lambda Event Logs Handling (#1678)
* Upgrade Spark version to 3.3 (#1675) -
a0c63a4
* ES Search Query Collect All Response  (#1631)
* Extend Tenant Perms Coverage (#1630)
* Limit Response info dataset queries (#1665)
* Add Removal Policy Retain to Bucket Policy IaC (#1660) 
* log API handler response only for LOG_LEVEL DEBUG. Set log level INFO
for prod deployments (#1662)
* Add permission checks to markNotificationAsRead + deleteNotification
(#1654)
* Added error view and unified utility to check tenant user (#1657
* Userguide signout flow (#1629)

### Relates
- Security release

### Security
Please answer the questions below briefly where applicable, or write
`N/A`. Based on
[OWASP 10](https://owasp.org/Top10/en/).

- Does this PR introduce or modify any input fields or queries - this
includes
fetching data from storage outside the application (e.g. a database, an
S3 bucket)?
  - Is the input sanitized?
- What precautions are you taking before deserializing the data you
consume?
  - Is injection prevented by parametrizing queries?
  - Have you ensured no `eval` or similar functions are used?
- Does this PR introduce any functionality or component that requires
authorization?
- How have you ensured it respects the existing AuthN/AuthZ mechanisms?
  - Are you logging failed auth attempts?
- Are you using or adding any cryptographic features?
  - Do you use a standard proven implementations?
  - Are the used keys controlled by the customer? Where are they stored?
- Are you introducing any new policies/roles/users?
  - Have you used the least-privilege principle? How?


By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.

---------

Co-authored-by: Noah Paige <[email protected]>
Co-authored-by: Petros Kalos <[email protected]>
  • Loading branch information
3 people authored Nov 8, 2024
1 parent 5e421fe commit 9f087ff
Show file tree
Hide file tree
Showing 91 changed files with 1,211 additions and 387 deletions.
586 changes: 582 additions & 4 deletions .checkov.baseline

Large diffs are not rendered by default.

Binary file modified UserGuide.pdf
Binary file not shown.
6 changes: 4 additions & 2 deletions backend/api_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
attach_tenant_policy_for_groups,
check_reauth,
validate_and_block_if_maintenance_window,
redact_creds,
)
from dataall.core.tasks.service_handlers import Worker
from dataall.base.aws.sqs import SqsQueue
Expand Down Expand Up @@ -83,6 +84,7 @@ def handler(event, context):
Return doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html
"""

event = redact_creds(event)
log.info('Lambda Event %s', event)
log.debug('Env name %s', ENVNAME)
log.debug('Engine %s', ENGINE.engine.url)
Expand Down Expand Up @@ -140,8 +142,8 @@ def handler(event, context):
dispose_context()
response = json.dumps(response)

log.info('Lambda Response %s', response)

log.info('Lambda Response Success: %s', success)
log.debug('Lambda Response %s', response)
return {
'statusCode': 200 if success else 400,
'headers': {
Expand Down
2 changes: 1 addition & 1 deletion backend/aws_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from dataall.base.loader import load_modules, ImportMode

logger = logging.getLogger()
logger.setLevel(os.environ.get('LOG_LEVEL'))
logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
log = logging.getLogger(__name__)

ENVNAME = os.getenv('envname', 'local')
Expand Down
3 changes: 1 addition & 2 deletions backend/dataall/base/aws/quicksight.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@

from .sts import SessionHelper

logger = logging.getLogger('QuicksightHandler')
logger.setLevel(logging.DEBUG)
logger = logging.getLogger(__name__)


class QuicksightClient:
Expand Down
12 changes: 11 additions & 1 deletion backend/dataall/base/utils/api_handler_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@
item.casefold() for item in ['getGroupsForUser', 'getMaintenanceWindowStatus']
]
ENGINE = get_engine(envname=ENVNAME)
AWS_REGION = os.getenv('AWS_REGION')


def redact_creds(event):
if event.get('headers', {}).get('Authorization'):
event['headers']['Authorization'] = 'XXXXXXXXXXXX'

if event.get('multiValueHeaders', {}).get('Authorization'):
event['multiValueHeaders']['Authorization'] = 'XXXXXXXXXXXX'
return event


def get_cognito_groups(claims):
Expand Down Expand Up @@ -106,7 +116,7 @@ def check_reauth(query, auth_time, username):
# Determine if there are any Operations that Require ReAuth From SSM Parameter
try:
reauth_apis = ParameterStoreManager.get_parameter_value(
region=os.getenv('AWS_REGION', 'eu-west-1'), parameter_path=f'/dataall/{ENVNAME}/reauth/apis'
region=AWS_REGION, parameter_path=f'/dataall/{ENVNAME}/reauth/apis'
).split(',')
except Exception:
log.info('No ReAuth APIs Found in SSM')
Expand Down
1 change: 1 addition & 0 deletions backend/dataall/core/environment/cdk/environment_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
versioned=True,
enforce_ssl=True,
)
default_environment_bucket.policy.apply_removal_policy(RemovalPolicy.RETAIN)
self.default_environment_bucket = default_environment_bucket

default_environment_bucket.add_to_resource_policy(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,12 @@ def get_statements(self):
f'arn:aws:iam::{self.account}:role/{self.role_name}',
],
),
# DENY to prevent pivot role to grant itself permissions
iam.PolicyStatement(
sid='IAMDenyForPivotRole',
effect=iam.Effect.DENY,
actions=['iam:Put*', 'iam:Delete*', 'iam:Update*', 'iam:AttachRolePolicy', 'iam:DetachRolePolicy'],
resources=[f'arn:aws:iam::{self.account}:role/{self.role_name}'],
),
]
return statements
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,7 @@ def list_all_active_environments(session) -> List[Environment]:
return environments

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
@ResourcePolicyService.has_resource_permission(environment_permissions.DELETE_ENVIRONMENT)
def delete_environment(uri):
with get_context().db_engine.scoped_session() as session:
Expand Down Expand Up @@ -927,6 +928,7 @@ def resolve_user_role(environment: Environment):
return EnvironmentPermission.NotInvited.value

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def enable_subscriptions(environmentUri: str = None, input: dict = None):
context = get_context()
with context.db_engine.scoped_session() as session:
Expand Down Expand Up @@ -962,6 +964,7 @@ def enable_subscriptions(environmentUri: str = None, input: dict = None):
return True

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def disable_subscriptions(environment_uri: str = None):
context = get_context()
with context.db_engine.scoped_session() as session:
Expand Down Expand Up @@ -1023,6 +1026,7 @@ def _get_environment_group_aws_session(session, username, groups, environment, g
return aws_session

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def get_environment_assume_role_url(
environmentUri: str = None,
groupUri: str = None,
Expand Down Expand Up @@ -1050,6 +1054,7 @@ def get_environment_assume_role_url(
return url

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def generate_environment_access_token(environmentUri: str = None, groupUri: str = None):
context = get_context()
with context.db_engine.scoped_session() as session:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
from dataall.base.utils import Parameter

root = logging.getLogger()
root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))

RETRIES = 30
SLEEP_TIME = 30
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def create_organization(data):
return org

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ORGANIZATIONS)
@ResourcePolicyService.has_resource_permission(UPDATE_ORGANIZATION)
def update_organization(uri, data):
context = get_context()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def check_group_environment_permission(uri, group, permission_name):
permission_name=permission_name,
)

@staticmethod
def has_group_permission(permission):
def decorator(f):
@wraps(f)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ def save_permissions_with_tenant(engine, envname=None):
TenantPolicyService.save_tenant(session, name=TenantPolicyService.TENANT_NAME, description='Tenant dataall')
PermissionService.init_permissions(session)

@staticmethod
def has_tenant_permission(permission: str):
"""
Decorator to check if a user has a permission to do some action.
Expand Down
2 changes: 1 addition & 1 deletion backend/dataall/core/stacks/tasks/cdkproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from dataall.base.db import get_engine

root = logging.getLogger()
root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
logger = logging.getLogger(__name__)
logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))


if __name__ == '__main__':
Expand Down
27 changes: 24 additions & 3 deletions backend/dataall/modules/catalog/indexers/base_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class BaseIndexer(ABC):

_INDEX = 'dataall-index'
_es = None
_QUERY_SIZE = 1000

@classmethod
def es(cls):
Expand Down Expand Up @@ -53,11 +54,31 @@ def _index(cls, doc_id, doc):
return False

@classmethod
def search(cls, query):
def search_all(cls, query, sort):
all_results = []
search_after = None
while True:
if search_after:
query['search_after'] = search_after

response = BaseIndexer.search(query=query, sort=sort)
hits = response['hits']['hits']
if not hits:
break # No more results

all_results.extend(hits)

# Update search_after for the next iteration
search_after = hits[-1]['sort']

return all_results

@classmethod
def search(cls, query, sort=None):
es = cls.es()
if es:
res = es.search(index=cls._INDEX, body=query)
log.info(f'Search query {query} returned {res["hits"]["total"]["value"]} records')
res = es.search(index=cls._INDEX, body=query, sort=sort, size=cls._QUERY_SIZE)
log.info(f'Search query {query} found {res["hits"]["total"]["value"]} total records')
return res
else:
log.error(f'ES config is missing, search query {query} failed')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,15 @@ def delete_node(uri: str = None):
return GlossaryRepository.delete_node(session=session, uri=uri)

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES)
def approve_term_association(linkUri: str):
with _session() as session:
return GlossaryRepository.approve_term_association(
session=session, username=get_context().username, groups=get_context().groups, linkUri=linkUri
)

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES)
def dismiss_term_association(linkUri: str):
with _session() as session:
return GlossaryRepository.dismiss_term_association(
Expand Down
10 changes: 4 additions & 6 deletions backend/dataall/modules/catalog/tasks/catalog_indexer_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from dataall.base.utils.alarm_service import AlarmService

root = logging.getLogger()
root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))


class CatalogIndexerTask:
Expand Down Expand Up @@ -43,12 +43,10 @@ def _delete_old_objects(cls, indexed_object_uris: List[str]) -> None:
# Search for documents in opensearch without an ID in the indexed_object_uris list
query = {'query': {'bool': {'must_not': {'terms': {'_id': indexed_object_uris}}}}}
# Delete All "Outdated" Objects from Index
docs = BaseIndexer.search(query)
for doc in docs.get('hits', {}).get('hits', []):
log.info(f'Deleting document {doc["_id"]}...')
docs = BaseIndexer.search_all(query, sort='_id')
for doc in docs:
BaseIndexer.delete_doc(doc_id=doc['_id'])

log.info(f'Deleted {len(docs.get("hits", {}).get("hits", []))} records')
log.info(f'Deleted {len(docs)} records')


if __name__ == '__main__':
Expand Down
8 changes: 2 additions & 6 deletions backend/dataall/modules/dashboards/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,13 @@
gql.Field('DashboardId', type=gql.String),
gql.Field('tags', type=gql.ArrayType(gql.String)),
gql.Field('created', type=gql.String),
gql.Field('AwsAccountId', type=gql.String),
gql.Field('updated', type=gql.String),
gql.Field('owner', type=gql.String),
gql.Field('SamlGroupName', type=gql.String),
gql.Field(
'organization',
type=gql.Ref('Organization'),
resolver=get_dashboard_organization,
),
gql.Field(
'environment',
type=gql.Ref('Environment'),
type=gql.Ref('EnvironmentSimplified'),
resolver=resolve_environment,
),
gql.Field(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from dataall.base.aws.secrets_manager import SecretsManager

log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)


class DashboardQuicksightClient:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ class DashboardService:
"""Service that serves request related to dashboard"""

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_DASHBOARDS)
@ResourcePolicyService.has_resource_permission(GET_DASHBOARD)
def get_dashboard(uri: str) -> Dashboard:
with get_context().db_engine.scoped_session() as session:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ def list_pipelines(*, filter: dict) -> dict:
)

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
@ResourcePolicyService.has_resource_permission(GET_PIPELINE)
def get_pipeline(
uri: str,
Expand All @@ -202,6 +201,7 @@ def get_clone_url_http(uri: str):
return f'codecommit::{env.region}://{pipeline.repo}'

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
@ResourcePolicyService.has_resource_permission(DELETE_PIPELINE)
def delete_pipeline(uri: str, deleteFromAWS: bool):
with _session() as session:
Expand Down Expand Up @@ -254,12 +254,14 @@ def _delete_repository(target_uri, accountid, cdk_role_arn, region, repo_name):
return True

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
def delete_pipeline_environment(envPipelineUri: str):
with _session() as session:
DatapipelinesRepository.delete_pipeline_environment(session=session, envPipelineUri=envPipelineUri)
return True

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
@ResourcePolicyService.has_resource_permission(CREDENTIALS_PIPELINE)
def get_credentials(uri):
with _session() as session:
Expand Down
7 changes: 1 addition & 6 deletions backend/dataall/modules/datasets_base/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,9 @@
gql.Field(name='imported', type=gql.Boolean),
gql.Field(
name='environment',
type=gql.Ref('Environment'),
type=gql.Ref('EnvironmentSimplified'),
resolver=get_dataset_environment,
),
gql.Field(
name='organization',
type=gql.Ref('Organization'),
resolver=get_dataset_organization,
),
gql.Field(
name='owners',
type=gql.String,
Expand Down
4 changes: 4 additions & 0 deletions backend/dataall/modules/mlstudio/services/mlstudio_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def create_sagemaker_studio_user(*, uri: str, admin_group: str, request: Sagemak
return sagemaker_studio_user

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
def update_sagemaker_studio_domain(environment, domain, data):
SagemakerStudioService._update_sagemaker_studio_domain_vpc(environment.AwsAccountId, environment.region, data)
domain.vpcType = data.get('vpcType')
Expand Down Expand Up @@ -205,6 +206,7 @@ def _update_sagemaker_studio_domain_vpc(account_id, region, data={}):
data['vpcType'] = 'created'

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
def create_sagemaker_studio_domain(session, environment, data: dict = {}):
SagemakerStudioService._update_sagemaker_studio_domain_vpc(environment.AwsAccountId, environment.region, data)

Expand Down Expand Up @@ -246,6 +248,7 @@ def get_sagemaker_studio_user_status(*, uri: str):
return status

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
@ResourcePolicyService.has_resource_permission(SGMSTUDIO_USER_URL)
def get_sagemaker_studio_user_presigned_url(*, uri: str):
with _session() as session:
Expand All @@ -259,6 +262,7 @@ def get_sagemaker_studio_user_applications(*, uri: str):
return sagemaker_studio_client(user).get_sagemaker_studio_user_applications()

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
@ResourcePolicyService.has_resource_permission(DELETE_SGMSTUDIO_USER)
def delete_sagemaker_studio_user(*, uri: str, delete_from_aws: bool):
"""Deletes SageMaker Studio user from the database and if delete_from_aws is True from AWS as well"""
Expand Down
Loading

0 comments on commit 9f087ff

Please sign in to comment.