diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index f9004b9ba9f86..d2d4f13ceb187 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -365,7 +365,7 @@ def metadata_read_capability_test( project_id=project_id, dataset_name=result[0].name, tables={}, - with_data_read_permission=config.is_profiling_enabled(), + with_data_read_permission=config.have_table_data_read_permission, ) if len(list(tables)) == 0: return CapabilityReport( @@ -1380,7 +1380,7 @@ def get_tables_for_dataset( project_id, dataset_name, items_to_get, - with_data_read_permission=self.config.is_profiling_enabled(), + with_data_read_permission=self.config.have_table_data_read_permission, ) items_to_get.clear() @@ -1389,7 +1389,7 @@ def get_tables_for_dataset( project_id, dataset_name, items_to_get, - with_data_read_permission=self.config.is_profiling_enabled(), + with_data_read_permission=self.config.have_table_data_read_permission, ) self.report.metadata_extraction_sec[f"{project_id}.{dataset_name}"] = round( diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index 2c7a53ef2bdca..578c9dddbd2e4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -184,6 +184,15 @@ class BigQueryV2Config( description="Number of partitioned table queried in batch when getting metadata. This is a low level config property which should be touched with care. This restriction is needed because we query partitions system view which throws error if we try to touch too many tables.", ) + use_tables_list_query_v2: bool = Field( + default=False, + description="List tables using an improved query that extracts partitions and last modified timestamps more accurately. Requires the ability to read table data. Automatically enabled when profiling is enabled.", + ) + + @property + def have_table_data_read_permission(self) -> bool: + return self.use_tables_list_query_v2 or self.is_profiling_enabled() + column_limit: int = Field( default=300, description="Maximum number of columns to process in a table. This is a low level config property which should be touched with care. This restriction is needed because excessively wide tables can result in failure to ingest the schema.",