Skip to content

Commit

Permalink
Skipping bigquery tables where we can't convert partition id to datet…
Browse files Browse the repository at this point in the history
…ime and also report the error
  • Loading branch information
treff7es committed Jul 13, 2022
1 parent 882e8cb commit 7763f32
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 8 deletions.
26 changes: 19 additions & 7 deletions metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,17 +868,29 @@ def generate_partition_profiler_query(
partitioned table.
See more about partitioned tables at https://cloud.google.com/bigquery/docs/partitioned-tables
"""

logger.debug(
f"generate partition profiler query for schema: {schema} and table {table}, partition_datetime: {partition_datetime}"
)
partition = self.get_latest_partition(schema, table)
if partition:
partition_where_clause: str
logger.debug(f"{table} is partitioned and partition column is {partition}")
(
partition_datetime,
upper_bound_partition_datetime,
) = get_partition_range_from_partition_id(
partition.partition_id, partition_datetime
)
try:
(
partition_datetime,
upper_bound_partition_datetime,
) = get_partition_range_from_partition_id(
partition.partition_id, partition_datetime
)
except ValueError as e:
logger.error(
f"Unable to get partition range for partition id: {partition.partition_id} it failed with exception {e}"
)
self.report.invalid_partition_ids[
f"{schema}.{table}"
] = partition.partition_id
return None, None

if partition.data_type in ("TIMESTAMP", "DATETIME"):
partition_where_clause = "{column_name} BETWEEN '{partition_id}' AND '{upper_bound_partition_id}'".format(
column_name=partition.column_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1425,7 +1425,8 @@ def loop_profiler_requests(
database=None, schema=schema, table=table
):
self.report.report_warning(
"profile skipped as partitioned table empty", dataset_name
"profile skipped as partitioned table is empty or partition id was invalid",
dataset_name,
)
continue

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ class BigQueryReport(SQLSourceReport):
table_metadata: Dict[str, List[str]] = field(default_factory=dict)
profile_table_selection_criteria: Dict[str, str] = field(default_factory=dict)
selected_profile_tables: Dict[str, List[str]] = field(default_factory=dict)
invalid_partition_ids: Dict[str, str] = field(default_factory=dict)

0 comments on commit 7763f32

Please sign in to comment.