From d85391efba12771ff0cf2716c610c98b43bb4d0b Mon Sep 17 00:00:00 2001 From: David Sanchez Date: Wed, 13 Apr 2022 01:26:52 +0200 Subject: [PATCH] fix(tableau): avoid duplicate schema in URNs for upstream tables (#4645) * fix(tableau): avoid duplicate schema in URNs for upstream tables * Fix(lint) --- .../src/datahub/ingestion/source/tableau.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index ba242372dae02d..5379ab907b5792 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -263,21 +263,31 @@ def _create_upstream_table_lineage( elif table["name"] is None: continue + schema = table.get("schema", "") + table_name = table.get("name", "") + full_name = table.get("fullName", "") upstream_db = table.get("database", {}).get("name", "") logger.debug( "Processing Table with Connection Type: {0} and id {1}".format( table.get("connectionType", ""), table.get("id", "") ) ) - schema = self._get_schema( - table.get("schema", ""), upstream_db, table.get("fullName", "") - ) + schema = self._get_schema(schema, upstream_db, full_name) + # if the schema is included within the table name we omit it + if ( + schema + and table_name + and full_name + and table_name == full_name + and schema in table_name + ): + schema = "" table_urn = make_table_urn( self.config.env, upstream_db, table.get("connectionType", ""), schema, - table.get("name", ""), + table_name, ) upstream_table = UpstreamClass( @@ -285,7 +295,7 @@ def _create_upstream_table_lineage( type=DatasetLineageTypeClass.TRANSFORMED, ) upstream_tables.append(upstream_table) - table_path = f"{project.replace('/', REPLACE_SLASH_CHAR)}/{datasource.get('name', '')}/{table.get('name', '')}" + table_path = f"{project.replace('/', REPLACE_SLASH_CHAR)}/{datasource.get('name', '')}/{table_name}" self.upstream_tables[table_urn] = ( table.get("columns", []), table_path,