diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index bf8504b9162ba..a0961019efc7d 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -46,7 +46,7 @@ We use a plugin architecture so that you can install only the dependencies you a | redshift | `pip install 'acryl-datahub[redshift]'` | Redshift source | | sqlalchemy | `pip install 'acryl-datahub[sqlalchemy]'` | Generic SQLAlchemy source | | snowflake | `pip install 'acryl-datahub[snowflake]'` | Snowflake source | -| superset | `pip install 'acryl-datahub[superset]'` | Superset source | +| superset | `pip install 'acryl-datahub[superset]'` | Superset source | | mongodb | `pip install 'acryl-datahub[mongodb]'` | MongoDB source | | ldap | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source | | looker | `pip install 'acryl-datahub[looker]'` | Looker source | @@ -340,6 +340,7 @@ source: password: pass provider: db | ldap connect_uri: http://localhost:8088 + env: "PROD" # Optional, default is "PROD" ``` See documentation for superset's `/security/login` at https://superset.apache.org/docs/rest-api for more details on superset's login api. @@ -496,6 +497,7 @@ source: connect_uri: "mongodb://localhost" username: admin password: password + env: "PROD" # Optional, default is "PROD" authMechanism: "DEFAULT" options: {} database_pattern: {} @@ -640,6 +642,7 @@ source: ``` Current limitations: + - Currently works only for Debezium source connectors. ## Sinks diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 76fbf23c4168d..fb662bc0efe52 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -54,7 +54,7 @@ def get_long_description(): sql_common = { # Required for all SQL sources. - "sqlalchemy>=1.3.24", + "sqlalchemy==1.3.24", } # Note: for all of these, framework_common will be added. diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index 8e3eb0d2ea9fb..3585d0d603a29 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -41,6 +41,8 @@ # https://stackoverflow.com/a/48273736/5004662. DENY_DATABASE_LIST = set(["admin", "config", "local"]) +DEFAULT_ENV = "PROD" + class MongoDBConfig(ConfigModel): # See the MongoDB authentication docs for details and examples. @@ -52,6 +54,7 @@ class MongoDBConfig(ConfigModel): options: dict = {} enableSchemaInference: bool = True schemaSamplingSize: Optional[PositiveInt] = 1000 + env: str = DEFAULT_ENV database_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() collection_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() @@ -392,7 +395,6 @@ def get_field_type( return SchemaFieldDataType(type=TypeClass()) def get_workunits(self) -> Iterable[MetadataWorkUnit]: - env = "PROD" platform = "mongodb" database_names: List[str] = self.mongo_client.list_database_names() @@ -417,7 +419,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]: continue dataset_snapshot = DatasetSnapshot( - urn=f"urn:li:dataset:(urn:li:dataPlatform:{platform},{dataset_name},{env})", + urn=f"urn:li:dataset:(urn:li:dataPlatform:{platform},{dataset_name},{self.config.env})", aspects=[], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 30265adac4490..be09328a0a958 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -74,6 +74,8 @@ def get_platform_from_sqlalchemy_uri(sqlalchemy_uri: str) -> str: "box_plot": ChartTypeClass.BAR, } +DEFAULT_ENV = "PROD" + class SupersetConfig(ConfigModel): # See the Superset /security/login endpoint for details @@ -83,6 +85,7 @@ class SupersetConfig(ConfigModel): password: Optional[str] = None provider: str = "db" options: dict = {} + env: str = DEFAULT_ENV def get_metric_name(metric): @@ -110,7 +113,6 @@ def get_filter_name(filter_obj): class SupersetSource(Source): config: SupersetConfig report: SourceReport - env = "PROD" platform = "superset" def __hash__(self): @@ -181,7 +183,7 @@ def get_datasource_urn_from_id(self, datasource_id): f"urn:li:dataset:(" f"{platform_urn},{database_name + '.' if database_name else ''}" f"{schema_name + '.' if schema_name else ''}" - f"{table_name},{self.env})" + f"{table_name},{self.config.env})" ) return dataset_urn return None