Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingest/tableau): make sites.get_by_id call optional #12024

Merged
merged 2 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 26 additions & 8 deletions metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
CapabilityReport,
MetadataWorkUnitProcessor,
Source,
StructuredLogLevel,
TestableSource,
TestConnectionReport,
)
Expand Down Expand Up @@ -700,18 +701,27 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
config=self.config,
ctx=self.ctx,
site=site,
site_id=site.id,
report=self.report,
server=self.server,
platform=self.platform,
)
logger.info(f"Ingesting assets of site '{site.content_url}'.")
yield from site_source.ingest_tableau_site()
else:
site = self.server.sites.get_by_id(self.server.site_id)
site = None
with self.report.report_exc(
title="Unable to fetch site details. Site hierarchy may be incomplete and external urls may be missing.",
message="This usually indicates missing permissions. Ensure that you have all necessary permissions.",
level=StructuredLogLevel.WARN,
):
site = self.server.sites.get_by_id(self.server.site_id)

site_source = TableauSiteSource(
config=self.config,
ctx=self.ctx,
site=site,
site_id=self.server.site_id,
report=self.report,
server=self.server,
platform=self.platform,
Expand Down Expand Up @@ -743,7 +753,8 @@ def __init__(
self,
config: TableauConfig,
ctx: PipelineContext,
site: SiteItem,
site: Optional[SiteItem],
site_id: Optional[str],
report: TableauSourceReport,
server: Server,
platform: str,
Expand All @@ -752,9 +763,16 @@ def __init__(
self.report = report
self.server: Server = server
self.ctx: PipelineContext = ctx
self.site: SiteItem = site
self.platform = platform

self.site: Optional[SiteItem] = site
if site_id is not None:
self.site_id: str = site_id
else:
assert self.site is not None, "site or site_id is required"
assert self.site.id is not None, "site_id is required when site is provided"
self.site_id = self.site.id

self.database_tables: Dict[str, DatabaseTable] = {}
self.tableau_stat_registry: Dict[str, UsageStat] = {}
self.tableau_project_registry: Dict[str, TableauProject] = {}
Expand Down Expand Up @@ -808,7 +826,7 @@ def dataset_browse_prefix(self) -> str:
def _re_authenticate(self):
tableau_auth: Union[
TableauAuth, PersonalAccessTokenAuth
] = self.config.get_tableau_auth(self.site.content_url)
] = self.config.get_tableau_auth(self.site_id)
self.server.auth.sign_in(tableau_auth)

@property
Expand Down Expand Up @@ -3181,10 +3199,10 @@ def emit_project_in_topological_order(
else:
# This is a root Tableau project since the parent_project_id is None.
# For a root project, either the site is the parent, or the platform is the default parent.
if self.config.add_site_container and self.site and self.site.id:
if self.config.add_site_container:
# The site containers have already been generated by emit_site_container, so we
# don't need to emit them again here.
parent_project_key = self.gen_site_key(self.site.id)
parent_project_key = self.gen_site_key(self.site_id)

yield from gen_containers(
container_key=project_key,
Expand All @@ -3201,12 +3219,12 @@ def emit_project_in_topological_order(
yield from emit_project_in_topological_order(project)

def emit_site_container(self):
if not self.site or not self.site.id:
if not self.site:
logger.warning("Can not ingest site container. No site information found.")
return

yield from gen_containers(
container_key=self.gen_site_key(self.site.id),
container_key=self.gen_site_key(self.site_id),
name=self.site.name or "Default",
sub_types=[c.SITE],
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,7 @@ def check_lineage_metadata(
ctx=context,
platform="tableau",
site=SiteItem(name="Site 1", content_url="site1"),
site_id="site1",
report=TableauSourceReport(),
server=Server("https://test-tableau-server.com"),
)
Expand Down Expand Up @@ -1248,6 +1249,7 @@ def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_gra
config=mock.MagicMock(),
ctx=mock.MagicMock(),
site=mock.MagicMock(),
site_id=None,
server=mock_sdk.return_value,
report=reporter,
)
Expand Down
Loading