Skip to content

Commit

Permalink
Create DatasetLock for new datasets (#1090)
Browse files Browse the repository at this point in the history
### Feature or Bugfix
<!-- please choose -->
- Bugfix

### Detail
- For new datasets - create a new dataset lock record
- otherwise shares will timeout and fail since they can not acquire lock

### Relates
- #1072 

### Security
Please answer the questions below briefly where applicable, or write
`N/A`. Based on
[OWASP 10](https://owasp.org/Top10/en/).

- Does this PR introduce or modify any input fields or queries - this
includes
fetching data from storage outside the application (e.g. a database, an
S3 bucket)?
  - Is the input sanitized?
- What precautions are you taking before deserializing the data you
consume?
  - Is injection prevented by parametrizing queries?
  - Have you ensured no `eval` or similar functions are used?
- Does this PR introduce any functionality or component that requires
authorization?
- How have you ensured it respects the existing AuthN/AuthZ mechanisms?
  - Are you logging failed auth attempts?
- Are you using or adding any cryptographic features?
  - Do you use a standard proven implementations?
  - Are the used keys controlled by the customer? Where are they stored?
- Are you introducing any new policies/roles/users?
  - Have you used the least-privilege principle? How?


By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.

---------

Co-authored-by: dlpzx <[email protected]>
  • Loading branch information
noah-paige and dlpzx authored Mar 7, 2024
1 parent a463374 commit 4a9cc1d
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 4 deletions.
3 changes: 2 additions & 1 deletion backend/dataall/modules/datasets/services/dataset_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def create_dataset(uri, admin_group, data: dict):
dataset=dataset,
data=data
)
DatasetRepository.create_dataset_lock(session=session, dataset=dataset)

DatasetBucketRepository.create_dataset_bucket(session, dataset, data)

Expand Down Expand Up @@ -439,7 +440,7 @@ def delete_dataset(uri: str, delete_from_aws: bool = False):
ResourcePolicy.delete_resource_policy(
session=session, resource_uri=uri, group=dataset.stewards
)

DatasetRepository.delete_dataset_lock(session=session, dataset=dataset)
DatasetRepository.delete_dataset(session, dataset)

if delete_from_aws:
Expand Down
22 changes: 21 additions & 1 deletion backend/dataall/modules/datasets_base/db/dataset_repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from dataall.base.db.exceptions import ObjectNotFound
from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification, Language
from dataall.core.environment.services.environment_resource_manager import EnvironmentResource
from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset
from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset, DatasetLock
from dataall.base.utils.naming_convention import (
NamingConventionService,
NamingConventionPattern,
Expand Down Expand Up @@ -152,6 +152,26 @@ def _set_dataset_aws_resources(dataset: Dataset, data, environment):
dataset.GlueDataQualityTriggerName = f"{glue_etl_basename}-dqtrigger"
return dataset

@staticmethod
def create_dataset_lock(session, dataset: Dataset):
dataset_lock = DatasetLock(
datasetUri=dataset.datasetUri,
isLocked=False,
acquiredBy=''
)
session.add(dataset_lock)
session.commit()

@staticmethod
def delete_dataset_lock(session, dataset: Dataset):
dataset_lock = (
session.query(DatasetLock)
.filter(DatasetLock.datasetUri == dataset.datasetUri)
.first()
)
session.delete(dataset_lock)
session.commit()

@staticmethod
def paginated_dataset_tables(session, uri, data=None) -> dict:
query = (
Expand Down
4 changes: 3 additions & 1 deletion tests/modules/datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from dataall.core.environment.db.environment_models import Environment
from dataall.core.organizations.db.organization_models import Organization
from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository
from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset
from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetLock
from tests.core.stacks.test_stack import update_stack_query

from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification
Expand Down Expand Up @@ -358,7 +358,9 @@ def test_dataset_in_environment(client, env_fixture, dataset1, group):


def test_delete_dataset(client, dataset, env_fixture, org_fixture, db, module_mocker, group, user):
# Delete any Dataset before effectuating the test
with db.scoped_session() as session:
session.query(DatasetLock).delete()
session.query(Dataset).delete()
session.commit()
deleted_dataset = dataset(
Expand Down
8 changes: 7 additions & 1 deletion tests/modules/datasets/test_dataset_resource_found.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dataall.modules.datasets_base.db.dataset_models import Dataset
from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetLock
from dataall.modules.datasets.services.dataset_permissions import CREATE_DATASET


Expand Down Expand Up @@ -118,6 +118,12 @@ def test_dataset_resource_found(db, client, env_fixture, org_fixture, group2, us

assert 'EnvironmentResourcesFound' in response.errors[0].message
with db.scoped_session() as session:
dataset_lock = (
session.query(DatasetLock)
.filter(DatasetLock.datasetUri == dataset.datasetUri)
.first()
)
session.delete(dataset_lock)
dataset = session.query(Dataset).get(dataset.datasetUri)
session.delete(dataset)
session.commit()
Expand Down

0 comments on commit 4a9cc1d

Please sign in to comment.