diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index f6a6b21f995c65..67044100c716c2 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -126,6 +126,7 @@ def get_long_description(): "ldap", "looker", "glue", + "hive", "datahub-kafka", "datahub-rest", # airflow is added below diff --git a/metadata-ingestion/src/datahub/ingestion/source/hive.py b/metadata-ingestion/src/datahub/ingestion/source/hive.py index 7bc06bc64659fe..dc5f2a149d8528 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/hive.py @@ -22,6 +22,12 @@ class HiveConfig(BasicSQLAlchemyConfig): # defaults scheme = "hive" + def get_identifier(self, schema: str, table: str) -> str: + regular = f"{schema}.{table}" + if self.database: + return f"{self.database}.{regular}" + return regular + class HiveSource(SQLAlchemySource): def __init__(self, config, ctx): diff --git a/metadata-ingestion/tests/unit/test_hive_source.py b/metadata-ingestion/tests/unit/test_hive_source.py new file mode 100644 index 00000000000000..f3f08444f6a317 --- /dev/null +++ b/metadata-ingestion/tests/unit/test_hive_source.py @@ -0,0 +1,23 @@ +import unittest + +from datahub.ingestion.source.hive import HiveConfig + + +class HiveSinkTest(unittest.TestCase): + def test_hive_configuration_get_indentifier_with_database(self): + test_db_name = "test_database" + test_schema_name = "test_schema" + test_table_name = "test_table" + config_dict = { + "username": "test", + "password": "test", + "host_port": "test:80", + "database": test_db_name, + "scheme": "hive+https", + } + hive_config = HiveConfig.parse_obj(config_dict) + expected_output = f"{test_db_name}.{test_schema_name}.{test_table_name}" + output = hive_config.get_identifier( + schema=test_schema_name, table=test_table_name + ) + assert output == expected_output