Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(tableau): review reporting and debug traces #12015

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 27 additions & 19 deletions metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,16 +289,12 @@ def make_tableau_client(self, site: str) -> Server:
server.auth.sign_in(authentication)
return server
except ServerResponseError as e:
message = f"Unable to login (invalid/expired credentials or missing permissions): {str(e)}"
if isinstance(authentication, PersonalAccessTokenAuth):
# Docs on token expiry in Tableau:
# https://help.tableau.com/current/server/en-us/security_personal_access_tokens.htm#token-expiry
logger.info(
"Error authenticating with Tableau. Note that Tableau personal access tokens "
"expire if not used for 15 days or if over 1 year old"
)
raise ValueError(
f"Unable to login (invalid/expired credentials or missing permissions): {str(e)}"
) from e
message = f"Error authenticating with Tableau. Note that Tableau personal access tokens expire if not used for 15 days or if over 1 year old: {str(e)}"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's keep the "(invalid/expired credentials or missing permissions)" bit

unless that's not applicable for PersonalAccessTokenAuth authentication?

raise ValueError(message) from e
except Exception as e:
raise ValueError(
f"Unable to login (check your Tableau connection and credentials): {str(e)}"
Expand Down Expand Up @@ -722,6 +718,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
title="Failed to Retrieve Tableau Metadata",
message="Unable to retrieve metadata from tableau.",
context=str(md_exception),
exc=md_exception,
)

def close(self) -> None:
Expand Down Expand Up @@ -826,6 +823,7 @@ def _populate_usage_stat_registry(self) -> None:
if not view.id:
continue
self.tableau_stat_registry[view.id] = UsageStat(view_count=view.total_views)
logger.info(f"Got Tableau stats for {len(self.tableau_stat_registry)} assets")
logger.debug("Tableau stats %s", self.tableau_stat_registry)

def _populate_database_server_hostname_map(self) -> None:
Expand Down Expand Up @@ -876,7 +874,7 @@ def form_path(project_id: str) -> List[str]:
ancestors = [cur_proj.name]
while cur_proj.parent_id is not None:
if cur_proj.parent_id not in all_project_map:
self.report.report_warning(
self.report.warning(
"project-issue",
f"Parent project {cur_proj.parent_id} not found. We need Site Administrator Explorer permissions.",
)
Expand Down Expand Up @@ -974,8 +972,11 @@ def _init_datasource_registry(self) -> None:
self.datasource_project_map[ds.id] = ds.project_id
except Exception as e:
self.report.get_all_datasources_query_failed = True
logger.info(f"Get all datasources query failed due to error {e}")
logger.debug("Error stack trace", exc_info=True)
self.report.warning(
title="Unexpected Query Error",
message="Get all datasources query failed due to error",
exc=e,
)

def _init_workbook_registry(self) -> None:
if self.server is None:
Expand Down Expand Up @@ -1141,7 +1142,6 @@ def get_connection_object_page(
)

if node_limit_errors:
logger.debug(f"Node Limit Error. query_data {query_data}")
self.report.warning(
title="Tableau Data Exceed Predefined Limit",
message="The numbers of record in result set exceeds a predefined limit. Increase the tableau "
Expand Down Expand Up @@ -1257,9 +1257,10 @@ def emit_workbooks(self) -> Iterable[MetadataWorkUnit]:
wrk_id: Optional[str] = workbook.get(c.ID)
prj_name: Optional[str] = workbook.get(c.PROJECT_NAME)

logger.debug(
f"Skipping workbook {wrk_name}({wrk_id}) as it is project {prj_name}({project_luid}) not "
f"present in project registry"
self.report.warning(
title="Skipping Missing Workbook",
message="Skipping workbook as its project is not present in project registry",
context=f"workbook={wrk_name}({wrk_id}), project={prj_name}({project_luid})",
)
continue

Expand Down Expand Up @@ -1453,7 +1454,7 @@ def get_upstream_tables(
c.COLUMNS_CONNECTION
].get("totalCount")
if not is_custom_sql and not num_tbl_cols:
logger.debug(
logger.warning(
f"Skipping upstream table with id {table[c.ID]}, no columns: {table}"
)
continue
Expand All @@ -1469,7 +1470,12 @@ def get_upstream_tables(
table, default_schema_map=self.config.default_schema_map
)
except Exception as e:
logger.info(f"Failed to generate upstream reference for {table}: {e}")
self.report.warning(
title="Potentially Missing Lineage Issue",
message="Failed to generate upstream reference",
exc=e,
context=f"table={table}",
)
continue

table_urn = ref.make_dataset_urn(
Expand Down Expand Up @@ -1917,10 +1923,12 @@ def _query_published_datasource_for_project_luid(self, ds_luid: str) -> None:
self.datasource_project_map[ds_result.id] = ds_result.project_id
except Exception as e:
self.report.num_get_datasource_query_failures += 1
logger.warning(
f"Failed to get datasource project_luid for {ds_luid} due to error {e}"
self.report.warning(
title="Unexpected Query Error",
message="Failed to get datasource details",
exc=e,
context=f"ds_luid={ds_luid}",
)
logger.debug("Error stack trace", exc_info=True)

def _get_workbook_project_luid(self, wb: dict) -> Optional[str]:
if wb.get(c.LUID) and self.workbook_project_map.get(wb[c.LUID]):
Expand Down
Loading