diff --git a/datasets/broad_references/infra/provider.tf b/datasets/broad_references/infra/provider.tf new file mode 100644 index 000000000..23ab87dcd --- /dev/null +++ b/datasets/broad_references/infra/provider.tf @@ -0,0 +1,28 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +provider "google" { + project = var.project_id + impersonate_service_account = var.impersonating_acct + region = var.region +} + +data "google_client_openid_userinfo" "me" {} + +output "impersonating-account" { + value = data.google_client_openid_userinfo.me.email +} diff --git a/datasets/broad_references/infra/variables.tf b/datasets/broad_references/infra/variables.tf new file mode 100644 index 000000000..c3ec7c506 --- /dev/null +++ b/datasets/broad_references/infra/variables.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +variable "project_id" {} +variable "bucket_name_prefix" {} +variable "impersonating_acct" {} +variable "region" {} +variable "env" {} + diff --git a/datasets/broad_references/pipelines/copy_gcs_bucket/copy_gcs_bucket_dag.py b/datasets/broad_references/pipelines/copy_gcs_bucket/copy_gcs_bucket_dag.py new file mode 100644 index 000000000..24f4b043e --- /dev/null +++ b/datasets/broad_references/pipelines/copy_gcs_bucket/copy_gcs_bucket_dag.py @@ -0,0 +1,48 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.providers.google.cloud.operators import cloud_storage_transfer_service + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2022-03-01", +} + + +with DAG( + dag_id="broad_references.copy_gcs_bucket", + default_args=default_args, + max_active_runs=1, + schedule_interval="@daily", + catchup=False, + default_view="graph", +) as dag: + + # Task to run a GCS to GCS operation using Google resources + gcs_bucket_transfer = cloud_storage_transfer_service.CloudDataTransferServiceGCSToGCSOperator( + task_id="gcs_bucket_transfer", + timeout=43200, + retries=0, + wait=True, + project_id="bigquery-public-data", + source_bucket="{{ var.json.broad_references.source_bucket }}", + destination_bucket="gcp-public-data--broad-references", + google_impersonation_chain="{{ var.json.broad_references.service_account }}", + transfer_options={"deleteObjectsUniqueInSink": False}, + ) + + gcs_bucket_transfer diff --git a/datasets/broad_references/pipelines/copy_gcs_bucket/pipeline.yaml b/datasets/broad_references/pipelines/copy_gcs_bucket/pipeline.yaml new file mode 100644 index 000000000..8b9f0eda7 --- /dev/null +++ b/datasets/broad_references/pipelines/copy_gcs_bucket/pipeline.yaml @@ -0,0 +1,48 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +--- +resources: ~ + +dag: + airflow_version: 2 + initialize: + dag_id: copy_gcs_bucket + default_args: + owner: "Google" + depends_on_past: False + start_date: '2022-03-01' + max_active_runs: 1 + schedule_interval: "@daily" + catchup: False + default_view: graph + + tasks: + - operator: "CloudDataTransferServiceGCSToGCSOperator" + description: "Task to run a GCS to GCS operation using Google resources" + args: + task_id: "gcs_bucket_transfer" + timeout: 43200 # 12 hours + retries: 0 + wait: True + project_id: bigquery-public-data + source_bucket: "{{ var.json.broad_references.source_bucket }}" + destination_bucket: "gcp-public-data--broad-references" + google_impersonation_chain: "{{ var.json.broad_references.service_account }}" + transfer_options: + deleteObjectsUniqueInSink: False + + graph_paths: + - "gcs_bucket_transfer" diff --git a/datasets/broad_references/pipelines/dataset.yaml b/datasets/broad_references/pipelines/dataset.yaml new file mode 100644 index 000000000..0f9246eff --- /dev/null +++ b/datasets/broad_references/pipelines/dataset.yaml @@ -0,0 +1,22 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dataset: + name: broad_references + friendly_name: ~ + description: ~ + dataset_sources: ~ + terms_of_use: ~ + +resources: ~