From 7ed3d58af6204f9bd4e11023cbd155f989ddadc6 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Mon, 24 May 2021 10:05:53 -0700 Subject: [PATCH 1/4] Update env --- metadata-ingestion/README.md | 4 +++- metadata-ingestion/src/datahub/ingestion/source/mongodb.py | 5 +++-- metadata-ingestion/src/datahub/ingestion/source/superset.py | 5 +++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index bf8504b9162ba..4d260d178a6cb 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -46,7 +46,7 @@ We use a plugin architecture so that you can install only the dependencies you a | redshift | `pip install 'acryl-datahub[redshift]'` | Redshift source | | sqlalchemy | `pip install 'acryl-datahub[sqlalchemy]'` | Generic SQLAlchemy source | | snowflake | `pip install 'acryl-datahub[snowflake]'` | Snowflake source | -| superset | `pip install 'acryl-datahub[superset]'` | Superset source | +| superset | `pip install 'acryl-datahub[superset]'` | Superset source | | mongodb | `pip install 'acryl-datahub[mongodb]'` | MongoDB source | | ldap | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source | | looker | `pip install 'acryl-datahub[looker]'` | Looker source | @@ -496,6 +496,7 @@ source: connect_uri: "mongodb://localhost" username: admin password: password + env: PROD authMechanism: "DEFAULT" options: {} database_pattern: {} @@ -640,6 +641,7 @@ source: ``` Current limitations: + - Currently works only for Debezium source connectors. ## Sinks diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index 8e3eb0d2ea9fb..1ce68512d52bf 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -41,6 +41,7 @@ # https://stackoverflow.com/a/48273736/5004662. DENY_DATABASE_LIST = set(["admin", "config", "local"]) +DEFAULT_ENV = "PROD" class MongoDBConfig(ConfigModel): # See the MongoDB authentication docs for details and examples. @@ -52,6 +53,7 @@ class MongoDBConfig(ConfigModel): options: dict = {} enableSchemaInference: bool = True schemaSamplingSize: Optional[PositiveInt] = 1000 + env: str = DEFAULT_ENV database_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() collection_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() @@ -392,7 +394,6 @@ def get_field_type( return SchemaFieldDataType(type=TypeClass()) def get_workunits(self) -> Iterable[MetadataWorkUnit]: - env = "PROD" platform = "mongodb" database_names: List[str] = self.mongo_client.list_database_names() @@ -417,7 +418,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]: continue dataset_snapshot = DatasetSnapshot( - urn=f"urn:li:dataset:(urn:li:dataPlatform:{platform},{dataset_name},{env})", + urn=f"urn:li:dataset:(urn:li:dataPlatform:{platform},{dataset_name},{self.config.env})", aspects=[], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 30265adac4490..60c81e8107ea6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -74,6 +74,7 @@ def get_platform_from_sqlalchemy_uri(sqlalchemy_uri: str) -> str: "box_plot": ChartTypeClass.BAR, } +DEFAULT_ENV = "PROD" class SupersetConfig(ConfigModel): # See the Superset /security/login endpoint for details @@ -83,6 +84,7 @@ class SupersetConfig(ConfigModel): password: Optional[str] = None provider: str = "db" options: dict = {} + env: str = DEFAULT_ENV def get_metric_name(metric): @@ -110,7 +112,6 @@ def get_filter_name(filter_obj): class SupersetSource(Source): config: SupersetConfig report: SourceReport - env = "PROD" platform = "superset" def __hash__(self): @@ -181,7 +182,7 @@ def get_datasource_urn_from_id(self, datasource_id): f"urn:li:dataset:(" f"{platform_urn},{database_name + '.' if database_name else ''}" f"{schema_name + '.' if schema_name else ''}" - f"{table_name},{self.env})" + f"{table_name},{self.config.env})" ) return dataset_urn return None From 04e27b8384efba513a3f0e26fa213d9fd5f07953 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Mon, 24 May 2021 10:51:58 -0700 Subject: [PATCH 2/4] Update README with env --- metadata-ingestion/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index 4d260d178a6cb..a0961019efc7d 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -340,6 +340,7 @@ source: password: pass provider: db | ldap connect_uri: http://localhost:8088 + env: "PROD" # Optional, default is "PROD" ``` See documentation for superset's `/security/login` at https://superset.apache.org/docs/rest-api for more details on superset's login api. @@ -496,7 +497,7 @@ source: connect_uri: "mongodb://localhost" username: admin password: password - env: PROD + env: "PROD" # Optional, default is "PROD" authMechanism: "DEFAULT" options: {} database_pattern: {} From e8cf7a41908bbcf2a724299219c1354306af8c5d Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Mon, 24 May 2021 10:56:21 -0700 Subject: [PATCH 3/4] Black --- metadata-ingestion/src/datahub/ingestion/source/mongodb.py | 1 + metadata-ingestion/src/datahub/ingestion/source/superset.py | 1 + 2 files changed, 2 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index 1ce68512d52bf..3585d0d603a29 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -43,6 +43,7 @@ DEFAULT_ENV = "PROD" + class MongoDBConfig(ConfigModel): # See the MongoDB authentication docs for details and examples. # https://pymongo.readthedocs.io/en/stable/examples/authentication.html diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 60c81e8107ea6..be09328a0a958 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -76,6 +76,7 @@ def get_platform_from_sqlalchemy_uri(sqlalchemy_uri: str) -> str: DEFAULT_ENV = "PROD" + class SupersetConfig(ConfigModel): # See the Superset /security/login endpoint for details # https://superset.apache.org/docs/rest-api From 03fc6694afc41d10712d050726a5205b34625288 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Mon, 24 May 2021 11:58:26 -0700 Subject: [PATCH 4/4] Pin sqlalchemy version --- metadata-ingestion/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 76fbf23c4168d..fb662bc0efe52 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -54,7 +54,7 @@ def get_long_description(): sql_common = { # Required for all SQL sources. - "sqlalchemy>=1.3.24", + "sqlalchemy==1.3.24", } # Note: for all of these, framework_common will be added.