Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(bigquery): multi-project GCP setup run query through correct project #5393

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ def __init__(self, config, ctx):
def get_multiproject_project_id(
self, inspector: Optional[Inspector] = None, run_on_compute: bool = False
) -> Optional[str]:
"""
Use run_on_compute = true when running queries on storage project
where you don't have job create rights
"""
if self.config.storage_project_id and (not run_on_compute):
return self.config.storage_project_id
elif self.config.project_id:
Expand All @@ -353,6 +357,11 @@ def get_multiproject_project_id(
return None

def get_db_name(self, inspector: Inspector) -> str:
"""
DO NOT USE this to get project name when running queries.
That can cause problems with multi-project setups.
Use get_multiproject_project_id with run_on_compute = True
"""
db_name = self.get_multiproject_project_id(inspector)
# db name can't be empty here as we pass in inpector to get_multiproject_project_id
assert db_name
Expand Down Expand Up @@ -458,7 +467,7 @@ def generate_profile_candidates(
profile_clause = c if c == "" else f" WHERE {c}"[:-4]
if profile_clause == "":
return None
project_id = self.get_db_name(inspector)
project_id = self.get_multiproject_project_id(inspector, run_on_compute=True)
_client: BigQueryClient = BigQueryClient(project=project_id)
# Reading all tables' metadata to report
base_query = (
Expand Down