From b7c3addacd3a63b4682393be5f1a4ac4b0a8434a Mon Sep 17 00:00:00 2001 From: Luc Tremsal Date: Thu, 7 Mar 2024 14:17:58 +0100 Subject: [PATCH 1/4] feat(ingest/superset): map awsathena platform name to athena --- metadata-ingestion/src/datahub/ingestion/source/superset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 18f8e3709a648e..316ead1c0072d4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -226,7 +226,10 @@ def get_platform_from_database_id(self, database_id): ).json() sqlalchemy_uri = database_response.get("result", {}).get("sqlalchemy_uri") if sqlalchemy_uri is None: - return database_response.get("result", {}).get("backend", "external") + platform_name = database_response.get("result", {}).get("backend", "external") + if platform_name == "awsathena": + return "athena" + return platform_name return get_platform_from_sqlalchemy_uri(sqlalchemy_uri) @lru_cache(maxsize=None) From 8cf65a032916d2c0e9d8bea25c17742b10792241 Mon Sep 17 00:00:00 2001 From: Luc Tremsal Date: Thu, 7 Mar 2024 15:30:04 +0100 Subject: [PATCH 2/4] feat(ingest/superset): replace awsathena with athena directly in the sqlalchemy uri --- .../src/datahub/ingestion/source/superset.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 316ead1c0072d4..8b7bccefaba04d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -1,6 +1,7 @@ import json import logging from functools import lru_cache +import re from typing import Dict, Iterable, List, Optional import dateutil.parser as dp @@ -226,11 +227,15 @@ def get_platform_from_database_id(self, database_id): ).json() sqlalchemy_uri = database_response.get("result", {}).get("sqlalchemy_uri") if sqlalchemy_uri is None: + # Changing awsathena to athena when calling the database backend platform_name = database_response.get("result", {}).get("backend", "external") if platform_name == "awsathena": return "athena" return platform_name - return get_platform_from_sqlalchemy_uri(sqlalchemy_uri) + # Changing awsathena to athena directly in the sqlachemy uri + pattern = re.compile(r"^(awsathena)\+") + changed_sqlalchemy_uri = re.sub(pattern, "athena+", sqlalchemy_uri) + return get_platform_from_sqlalchemy_uri(changed_sqlalchemy_uri) @lru_cache(maxsize=None) def get_datasource_urn_from_id(self, datasource_id): From 4da34dd60c9fa8cc14953552b2435918e2f89180 Mon Sep 17 00:00:00 2001 From: Luc Tremsal Date: Thu, 7 Mar 2024 15:39:59 +0100 Subject: [PATCH 3/4] ref(ingest/superset): change platform name awsathena to athena --- .../src/datahub/ingestion/source/superset.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 8b7bccefaba04d..8673569ab02b98 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -1,7 +1,6 @@ import json import logging from functools import lru_cache -import re from typing import Dict, Iterable, List, Optional import dateutil.parser as dp @@ -227,15 +226,12 @@ def get_platform_from_database_id(self, database_id): ).json() sqlalchemy_uri = database_response.get("result", {}).get("sqlalchemy_uri") if sqlalchemy_uri is None: - # Changing awsathena to athena when calling the database backend platform_name = database_response.get("result", {}).get("backend", "external") - if platform_name == "awsathena": - return "athena" - return platform_name - # Changing awsathena to athena directly in the sqlachemy uri - pattern = re.compile(r"^(awsathena)\+") - changed_sqlalchemy_uri = re.sub(pattern, "athena+", sqlalchemy_uri) - return get_platform_from_sqlalchemy_uri(changed_sqlalchemy_uri) + else: + platform_name = get_platform_from_sqlalchemy_uri(sqlalchemy_uri) + if platform_name == "awsathena": + return "athena" + return platform_name @lru_cache(maxsize=None) def get_datasource_urn_from_id(self, datasource_id): From ecfa7e73f16d2423c8ddd4b1d150b0af92a34565 Mon Sep 17 00:00:00 2001 From: Luc Tremsal Date: Fri, 8 Mar 2024 09:55:37 +0100 Subject: [PATCH 4/4] style: run black --- metadata-ingestion/src/datahub/ingestion/source/superset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 8673569ab02b98..1fbce27d0af240 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -226,7 +226,9 @@ def get_platform_from_database_id(self, database_id): ).json() sqlalchemy_uri = database_response.get("result", {}).get("sqlalchemy_uri") if sqlalchemy_uri is None: - platform_name = database_response.get("result", {}).get("backend", "external") + platform_name = database_response.get("result", {}).get( + "backend", "external" + ) else: platform_name = get_platform_from_sqlalchemy_uri(sqlalchemy_uri) if platform_name == "awsathena":