diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py index 947f9b9271847..fcb4d63477f7b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py @@ -868,17 +868,29 @@ def generate_partition_profiler_query( partitioned table. See more about partitioned tables at https://cloud.google.com/bigquery/docs/partitioned-tables """ - + logger.debug( + f"generate partition profiler query for schema: {schema} and table {table}, partition_datetime: {partition_datetime}" + ) partition = self.get_latest_partition(schema, table) if partition: partition_where_clause: str logger.debug(f"{table} is partitioned and partition column is {partition}") - ( - partition_datetime, - upper_bound_partition_datetime, - ) = get_partition_range_from_partition_id( - partition.partition_id, partition_datetime - ) + try: + ( + partition_datetime, + upper_bound_partition_datetime, + ) = get_partition_range_from_partition_id( + partition.partition_id, partition_datetime + ) + except ValueError as e: + logger.error( + f"Unable to get partition range for partition id: {partition.partition_id} it failed with exception {e}" + ) + self.report.invalid_partition_ids[ + f"{schema}.{table}" + ] = partition.partition_id + return None, None + if partition.data_type in ("TIMESTAMP", "DATETIME"): partition_where_clause = "{column_name} BETWEEN '{partition_id}' AND '{upper_bound_partition_id}'".format( column_name=partition.column_name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 034445bd853c5..6f41a3944ea06 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -1425,7 +1425,8 @@ def loop_profiler_requests( database=None, schema=schema, table=table ): self.report.report_warning( - "profile skipped as partitioned table empty", dataset_name + "profile skipped as partitioned table is empty or partition id was invalid", + dataset_name, ) continue diff --git a/metadata-ingestion/src/datahub/ingestion/source_report/sql/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source_report/sql/bigquery.py index e8577164640b9..066c0ee0c080d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_report/sql/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source_report/sql/bigquery.py @@ -36,3 +36,4 @@ class BigQueryReport(SQLSourceReport): table_metadata: Dict[str, List[str]] = field(default_factory=dict) profile_table_selection_criteria: Dict[str, str] = field(default_factory=dict) selected_profile_tables: Dict[str, List[str]] = field(default_factory=dict) + invalid_partition_ids: Dict[str, str] = field(default_factory=dict)