hashicorp · modular-magician · Aug 4, 2023 · Aug 4, 2023
@@ -0,0 +1,3 @@
+```release-note:enhancement
+bigquery: added `external_data_configuration.file_set_spec_type` to `google_bigquery_table`
+```
@@ -534,6 +534,48 @@ func TestAccBigQueryExternalDataTable_parquetOptions(t *testing.T) {
 	})
 }
 
+func TestAccBigQueryExternalDataTable_iceberg(t *testing.T) {
+	t.Parallel()
+
+	bucketName := testBucketName(t)
+
+	datasetID := fmt.Sprintf("tf_test_%s", acctest.RandString(t, 10))
+	tableID := fmt.Sprintf("tf_test_%s", acctest.RandString(t, 10))
+
+	acctest.VcrTest(t, resource.TestCase{
+		PreCheck:                 func() { acctest.AccTestPreCheck(t) },
+		ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories(t),
+		CheckDestroy:             testAccCheckBigQueryTableDestroyProducer(t),
+		Steps: []resource.TestStep{
+			{
+				Config: testAccBigQueryTableFromGCSIceberg(datasetID, tableID, bucketName),
+			},
+		},
+	})
+}
+
+func TestAccBigQueryExternalDataTable_parquetFileSetSpecType(t *testing.T) {
+	t.Parallel()
+
+	bucketName := testBucketName(t)
+
+	datasetID := fmt.Sprintf("tf_test_%s", acctest.RandString(t, 10))
+	tableID := fmt.Sprintf("tf_test_%s", acctest.RandString(t, 10))
+	parquetFileName := "test.parquet"
+	manifestName := fmt.Sprintf("tf_test_%s.manifest.json", acctest.RandString(t, 10))
+
+	acctest.VcrTest(t, resource.TestCase{
+		PreCheck:                 func() { acctest.AccTestPreCheck(t) },
+		ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories(t),
+		CheckDestroy:             testAccCheckBigQueryTableDestroyProducer(t),
+		Steps: []resource.TestStep{
+			{
+				Config: testAccBigQueryTableFromGCSParquetManifest(datasetID, tableID, bucketName, manifestName, parquetFileName),
+			},
+		},
+	})
+}
+
 func TestAccBigQueryExternalDataTable_queryAcceleration(t *testing.T) {
 	t.Parallel()
 
@@ -1769,6 +1811,104 @@ resource "google_bigquery_table" "test" {
 `, datasetID, bucketName, objectName, tableID, enum, list)
 }
 
+func testAccBigQueryTableFromGCSIceberg(datasetID, tableID, bucketName string) string {
+	return fmt.Sprintf(`
+resource "google_bigquery_dataset" "test" {
+  dataset_id = "%s"
+}
+
+resource "google_storage_bucket" "test" {
+  name          = "%s"
+  location      = "US"
+  force_destroy = true
+  uniform_bucket_level_access = true
+}
+
+# Setup Empty Iceberg table in Bucket.
+// .
+// ├── data
+// └── metadata
+//     ├── 00000-1114da6b-bb88-4b5a-94bd-370f286c858a.metadata.json
+// Upload Data Files
+resource "google_storage_bucket_object" "empty_data_folder" {
+	name   = "data/"
+	content = " "
+	bucket = google_storage_bucket.test.name
+}
+// Upload Metadata File.
+resource "google_storage_bucket_object" "metadata" {
+	name    = "simple/metadata/00000-1114da6b-bb88-4b5a-94bd-370f286c858a.metadata.json"
+	source = "./test-fixtures/bigquerytable/simple/metadata/00000-1114da6b-bb88-4b5a-94bd-370f286c858a.metadata.json"
+	bucket = google_storage_bucket.test.name
+}
+
+resource "google_bigquery_table" "test" {
+  deletion_protection = false
+  table_id   = "%s"
+  dataset_id = google_bigquery_dataset.test.dataset_id
+  external_data_configuration {
+    autodetect    = false
+    source_format = "ICEBERG"
+	# Point to metadata.json.
+    source_uris = [
+      "gs://${google_storage_bucket.test.name}/simple/metadata/00000-1114da6b-bb88-4b5a-94bd-370f286c858a.metadata.json",
+    ]
+  }
+  # Depends on Iceberg Table Files
+  depends_on = [
+	google_storage_bucket_object.empty_data_folder,
+	google_storage_bucket_object.metadata, 
+  ]
+}
+`, datasetID, bucketName, tableID)
+}
+
+func testAccBigQueryTableFromGCSParquetManifest(datasetID, tableID, bucketName, manifestName, parquetFileName string) string {
+	return fmt.Sprintf(`
+resource "google_bigquery_dataset" "test" {
+  dataset_id = "%s"
+}
+
+resource "google_storage_bucket" "test" {
+  name          = "%s"
+  location      = "US"
+  force_destroy = true
+  uniform_bucket_level_access = true
+}
+
+# Upload Data File.
+resource "google_storage_bucket_object" "datafile" {
+	name = "%s"
+	source = "./test-fixtures/bigquerytable/simple/data/00000-0-4e4a11ad-368c-496b-97ae-e3ac28051a4d-00001.parquet"
+	bucket = google_storage_bucket.test.name
+}
+
+# Upload Metadata file
+resource "google_storage_bucket_object" "manifest" {
+	name = "%s" 
+	content = "gs://${google_storage_bucket.test.name}/${google_storage_bucket_object.datafile.name}"
+	bucket = google_storage_bucket.test.name
+}
+
+
+resource "google_bigquery_table" "test" {
+  deletion_protection = false
+  table_id   = "%s"
+  dataset_id = google_bigquery_dataset.test.dataset_id
+  external_data_configuration {
+    autodetect    = false
+    source_format = "PARQUET"
+	# Specify URI is a manifest.
+	file_set_spec_type = "FILE_SET_SPEC_TYPE_NEW_LINE_DELIMITED_MANIFEST"
+	# Point to metadata.json.
+    source_uris = [
+      "gs://${google_storage_bucket.test.name}/${google_storage_bucket_object.manifest.name}",
+    ]
+  }
+}
+`, datasetID, bucketName, manifestName, parquetFileName, tableID)
+}
+
 func testAccBigQueryTableFromGCSObjectTable(connectionID, datasetID, tableID, bucketName, objectName, maxStaleness string) string {
 	return fmt.Sprintf(`
 resource "google_bigquery_connection" "test" {

@@ -457,6 +457,12 @@ func ResourceBigQueryTable() *schema.Resource {
 							Description: `A list of the fully-qualified URIs that point to your data in Google Cloud.`,
 							Elem:        &schema.Schema{Type: schema.TypeString},
 						},
+						// FileSetSpecType: [Optional] Specifies how source URIs are interpreted for constructing the file set to load.  By default source URIs are expanded against the underlying storage.  Other options include specifying manifest files. Only applicable to object storage systems.
+						"file_set_spec_type": {
+							Type:        schema.TypeString,
+							Optional:    true,
+							Description: `Specifies how source URIs are interpreted for constructing the file set to load.  By default source URIs are expanded against the underlying storage.  Other options include specifying manifest files. Only applicable to object storage systems.`,
+						},
 						// Compression: [Optional] The compression type of the data source.
 						"compression": {
 							Type:         schema.TypeString,
@@ -1422,6 +1428,10 @@ func expandExternalDataConfiguration(cfg interface{}) (*bigquery.ExternalDataCon
 		edc.SourceUris = sourceUris
 	}
 
+	if v, ok := raw["file_set_spec_type"]; ok {
+		edc.FileSetSpecType = v.(string)
+	}
+
 	if v, ok := raw["compression"]; ok {
 		edc.Compression = v.(string)
 	}
@@ -1484,6 +1494,10 @@ func flattenExternalDataConfiguration(edc *bigquery.ExternalDataConfiguration) (
 	result["autodetect"] = edc.Autodetect
 	result["source_uris"] = edc.SourceUris
 
+	if edc.FileSetSpecType != "" {
+		result["file_set_spec_type"] = edc.FileSetSpecType
+	}
+
 	if edc.Compression != "" {
 		result["compression"] = edc.Compression
 	}

@@ -0,0 +1,70 @@
+{
+  "format-version": 1,
+  "table-uuid": "20f6c20d-d535-4c0c-be82-cb25d31e9250",
+  "location": "/home/iceberg/warehouse/simple",
+  "last-updated-ms": 1686356696798,
+  "last-column-id": 2,
+  "schema": {
+    "type": "struct",
+    "schema-id": 0,
+    "fields": [
+      {
+        "id": 1,
+        "name": "one",
+        "required": false,
+        "type": "string"
+      },
+      {
+        "id": 2,
+        "name": "two",
+        "required": false,
+        "type": "long"
+      }
+    ]
+  },
+  "current-schema-id": 0,
+  "schemas": [
+    {
+      "type": "struct",
+      "schema-id": 0,
+      "fields": [
+        {
+          "id": 1,
+          "name": "one",
+          "required": false,
+          "type": "string"
+        },
+        {
+          "id": 2,
+          "name": "two",
+          "required": false,
+          "type": "long"
+        }
+      ]
+    }
+  ],
+  "partition-spec": [],
+  "default-spec-id": 0,
+  "partition-specs": [
+    {
+      "spec-id": 0,
+      "fields": []
+    }
+  ],
+  "last-partition-id": 999,
+  "default-sort-order-id": 0,
+  "sort-orders": [
+    {
+      "order-id": 0,
+      "fields": []
+    }
+  ],
+  "properties": {
+    "owner": "root"
+  },
+  "current-snapshot-id": -1,
+  "refs": {},
+  "snapshots": [],
+  "snapshot-log": [],
+  "metadata-log": []
+}
@@ -210,6 +210,10 @@ in Terraform state, a `terraform destroy` or `terraform apply` that would delete
 * `source_uris` - (Required) A list of the fully-qualified URIs that point to
     your data in Google Cloud.
 
+* `file_set_spec_type` - (Optional) Specifies how source URIs are interpreted for constructing the file set to load.
+    By default source URIs are expanded against the underlying storage.
+    Other options include specifying manifest files. Only applicable to object storage systems. [Docs](cloud/bigquery/docs/reference/rest/v2/tables#filesetspectype)
+
 * `reference_file_schema_uri` - (Optional) When creating an external table, the user can provide a reference file with the table schema. This is enabled for the following formats: AVRO, PARQUET, ORC.
 
 * `metadata_cache_mode` - (Optional) Metadata Cache Mode for the table. Set this to enable caching of metadata from external data source. Valid values are `AUTOMATIC` and `MANUAL`.