Add project_id to Bigquery::Table#extract and #extract_job

[fixes googleapis#2609]
quartzmo · Nov 27, 2018 · 7d4af7f · 7d4af7f
1 parent f2fba83
commit 7d4af7f
Show file tree

Hide file tree

Showing 8 changed files with 134 additions and 21 deletions.
diff --git a/google-cloud-bigquery/acceptance/bigquery/table_test.rb b/google-cloud-bigquery/acceptance/bigquery/table_test.rb
@@ -953,6 +953,27 @@
     end
   end
 
+  it "extracts read-only data to a url in your bucket with extract_job and project_id" do
+    public_bigquery = Google::Cloud::Bigquery.new project_id: "bigquery-public-data"
+    public_dataset = public_bigquery.dataset "samples"
+    public_table = public_dataset.table "shakespeare"
+
+    Tempfile.open "empty_extract_file.json" do |tmp|
+      dest_file_name = random_file_destination_name
+      extract_url = "gs://#{bucket.name}/#{dest_file_name}"
+
+      extract_job = public_table.extract_job extract_url, project_id: bigquery.project_id # The user's project, not the public data project
+      extract_job.wait_until_done!
+      extract_job.wait_until_done!
+      extract_job.wont_be :failed?
+      extract_job.source.table_id.must_equal public_table.table_id
+
+      extract_file = bucket.file dest_file_name
+      downloaded_file = extract_file.download tmp.path
+      downloaded_file.size.must_be :>, 0
+    end
+  end
+
   it "extracts data to a url in your bucket with extract" do
     # Make sure there is data to extract...
     result = table.load local_file

diff --git a/google-cloud-bigquery/lib/google/cloud/bigquery/extract_job.rb b/google-cloud-bigquery/lib/google/cloud/bigquery/extract_job.rb
@@ -171,14 +171,13 @@ def initialize gapi
           # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
           #   configuration object for setting query options.
           def self.from_options service, table, storage_files, options = {}
-            job_ref = service.job_ref_from options[:job_id], options[:prefix]
+            job_ref = service.job_ref_from options[:job_id],
+                                           options[:prefix],
+                                           project_id: options[:project_id]
             storage_urls = Array(storage_files).map do |url|
               url.respond_to?(:to_gs_url) ? url.to_gs_url : url
             end
-            dest_format = options[:format]
-            if dest_format.nil?
-              dest_format = Convert.derive_source_format storage_urls.first
-            end
+            dest_format = get_dest_format options[:format], storage_urls
             req = Google::Apis::BigqueryV2::Job.new(
               job_reference: job_ref,
               configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
@@ -199,6 +198,12 @@ def self.from_options service, table, storage_files, options = {}
             updater
           end
 
+          ##
+          # @private
+          def self.get_dest_format dest_format, storage_urls
+            dest_format || Convert.derive_source_format(storage_urls.first)
+          end
+
           ##
           # Sets the geographic location where the job should run. Required
           # except for US and EU.

diff --git a/google-cloud-bigquery/lib/google/cloud/bigquery/service.rb b/google-cloud-bigquery/lib/google/cloud/bigquery/service.rb
@@ -356,11 +356,11 @@ def list_projects options = {}
         # If no job_id or prefix is given, always generate a client-side job ID
         # anyway, for idempotent retry in the google-api-client layer.
         # See https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid
-        def job_ref_from job_id, prefix, location: nil
+        def job_ref_from job_id, prefix, location: nil, project_id: nil
           prefix ||= "job_"
           job_id ||= "#{prefix}#{generate_id}"
           job_ref = API::JobReference.new(
-            project_id: @project,
+            project_id: (project_id || @project),
             job_id: job_id
           )
           # BigQuery does not allow nil location, but missing is ok.

diff --git a/google-cloud-bigquery/lib/google/cloud/bigquery/table.rb b/google-cloud-bigquery/lib/google/cloud/bigquery/table.rb
@@ -1407,6 +1407,9 @@ def copy destination_table, create: nil, write: nil, &block
         #   dashes. International characters are allowed. Label values are
         #   optional. Label keys must start with a letter and each label in the
         #   list must have a different key.
+        # @param [String] project_id Identifier for an alternative BigQuery
+        #   project to be used to run the job. Optional. If not present, the
+        #   default project for the client is used.
         # @yield [job] a job configuration object
         # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
         #   configuration object for setting additional options.
@@ -1421,26 +1424,43 @@ def copy destination_table, create: nil, write: nil, &block
         #   table = dataset.table "my_table"
         #
         #   extract_job = table.extract_job "gs://my-bucket/file-name.json",
-        #                               format: "json"
+        #                                   format: "json"
+        #
+        # @example Extract from a read-only project using `project_id`:
+        #   require "google/cloud/bigquery"
+        #
+        #   readonly_project = "bigquery-public-data"
+        #   bigquery = Google::Cloud::Bigquery.new project_id: readonly_project
+        #   dataset = bigquery.dataset "samples"
+        #   table = dataset.table "shakespeare"
+        #
+        #   extract_job = table.extract_job "gs://my-bucket/shakespeare.csv",
+        #                                   project_id: "my-project"
         #
         # @!group Data
         #
         def extract_job extract_url, format: nil, compression: nil,
                         delimiter: nil, header: nil, dryrun: nil, job_id: nil,
-                        prefix: nil, labels: nil
+                        prefix: nil, labels: nil, project_id: nil
           ensure_service!
           options = { format: format, compression: compression,
                       delimiter: delimiter, header: header, dryrun: dryrun,
-                      job_id: job_id, prefix: prefix, labels: labels }
+                      job_id: job_id, prefix: prefix, labels: labels,
+                      project_id: project_id }
           updater = ExtractJob::Updater.from_options service, table_ref,
                                                      extract_url, options
           updater.location = location if location # may be table reference
 
           yield updater if block_given?
 
           job_gapi = updater.to_gapi
-          gapi = service.extract_table job_gapi
-          Job.from_gapi gapi, service
+          if project_id
+            service2 = service.dup
+            service2.project = project_id
+            extract_table job_gapi, service2
+          else
+            extract_table job_gapi, service
+          end
         end
 
         ##
@@ -1476,6 +1496,9 @@ def extract_job extract_url, format: nil, compression: nil,
         #   exported data. Default is <code>,</code>.
         # @param [Boolean] header Whether to print out a header row in the
         #   results. Default is `true`.
+        # @param [String] project_id Identifier for an alternative BigQuery
+        #   project to be used to run the job. Optional. If not present, the
+        #   default project for the client is used.
         # @yield [job] a job configuration object
         # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
         #   configuration object for setting additional options.
@@ -1502,15 +1525,27 @@ def extract_job extract_url, format: nil, compression: nil,
         #     extract.labels = { "custom-label" => "custom-value" }
         #   end
         #
+        # @example Extract from a read-only project using `project_id`:
+        #   require "google/cloud/bigquery"
+        #
+        #   readonly_project = "bigquery-public-data"
+        #   bigquery = Google::Cloud::Bigquery.new project_id: readonly_project
+        #   dataset = bigquery.dataset "samples"
+        #   table = dataset.table "shakespeare"
+        #
+        #   table.extract "gs://my-bucket/shakespeare.csv",
+        #                 project_id: "my-project"
+        #
         # @!group Data
         #
         def extract extract_url, format: nil, compression: nil, delimiter: nil,
-                    header: nil, &block
+                    header: nil, project_id: nil, &block
           job = extract_job extract_url,
                             format: format,
                             compression: compression,
                             delimiter: delimiter,
                             header: header,
+                            project_id: project_id,
                             &block
           job.wait_until_done!
           ensure_job_succeeded! job
@@ -2485,6 +2520,11 @@ def get_table_ref table
           end
         end
 
+        def extract_table job_gapi, extract_service
+          gapi = extract_service.extract_table job_gapi
+          Job.from_gapi gapi, extract_service
+        end
+
         ##
         # Yielded to a block to accumulate changes for a patch request.
         class Updater < Table

diff --git a/google-cloud-bigquery/support/doctest_helper.rb b/google-cloud-bigquery/support/doctest_helper.rb
@@ -705,6 +705,22 @@ def mock_storage
     end
   end
 
+  doctest.before "Google::Cloud::Bigquery::Table#extract@Extract from a read-only project using `project_id`:" do
+    mock_bigquery do |mock|
+      mock.expect :get_dataset, dataset_full_gapi, ["my-project", "samples"]
+      mock.expect :get_table, table_full_gapi, ["my-project", "my_dataset", "shakespeare"]
+      mock.expect :insert_job, query_job_gapi, ["my-project", Google::Apis::BigqueryV2::Job]
+    end
+  end
+
+  doctest.before "Google::Cloud::Bigquery::Table#extract_job@Extract from a read-only project using `project_id`:" do
+    mock_bigquery do |mock|
+      mock.expect :get_dataset, dataset_full_gapi, ["my-project", "samples"]
+      mock.expect :get_table, table_full_gapi, ["my-project", "my_dataset", "shakespeare"]
+      mock.expect :insert_job, query_job_gapi, ["my-project", Google::Apis::BigqueryV2::Job]
+    end
+  end
+
   doctest.before "Google::Cloud::Bigquery::Table#insert" do
     mock_bigquery do |mock|
       mock.expect :get_dataset, dataset_full_gapi, ["my-project", "my_dataset"]

diff --git a/google-cloud-bigquery/test/google/cloud/bigquery/table_extract_job_test.rb b/google-cloud-bigquery/test/google/cloud/bigquery/table_extract_job_test.rb
@@ -244,6 +244,21 @@
     job.labels.must_equal labels
   end
 
+  it "can extract itself and specify a different project for the job" do
+    mock = Minitest::Mock.new
+    bigquery.service.mocked_service = mock
+    project_id_2 = "other-project"
+    job_gapi = extract_job_gapi table, extract_file, project_id: project_id_2
+
+    mock.expect :insert_job, job_gapi, [project_id_2, job_gapi]
+
+    job = table.extract_job extract_url, project_id: project_id_2
+    mock.verify
+
+    job.must_be_kind_of Google::Cloud::Bigquery::ExtractJob
+    job.project_id.must_equal project_id_2
+  end
+
   # Borrowed from MockStorage, extract to a common module?
 
   def random_bucket_hash name=random_bucket_name

diff --git a/google-cloud-bigquery/test/google/cloud/bigquery/table_extract_test.rb b/google-cloud-bigquery/test/google/cloud/bigquery/table_extract_test.rb
@@ -184,14 +184,30 @@
     result.must_equal true
   end
 
-  def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US"
-    Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location)
+  it "can extract itself and specify a different project for the job" do
+    mock = Minitest::Mock.new
+    bigquery.service.mocked_service = mock
+    project_id_2 = "other-project"
+    job_gapi = extract_job_gapi table, extract_file, project_id: project_id_2
+    job_resp_gapi = job_gapi.dup
+    job_resp_gapi.status = status "done"
+
+    mock.expect :insert_job, job_resp_gapi, [project_id_2, job_gapi]
+
+    result = table.extract extract_url, project_id: project_id_2
+    mock.verify
+
+    result.must_equal true
+  end
+
+  def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US", project_id: nil
+    Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location, project_id: project_id)
   end
 
-  def extract_job_json table, extract_file, job_id, location: "US"
+  def extract_job_json table, extract_file, job_id, location: "US", project_id: nil
     {
       "jobReference" => {
-        "projectId" => project,
+        "projectId" => (project_id || project),
         "jobId" => job_id,
         "location" => location
       },

diff --git a/google-cloud-bigquery/test/helper.rb b/google-cloud-bigquery/test/helper.rb
@@ -597,14 +597,14 @@ def query_job_json query, job_id: "job_9876543210", location: "US"
     hash.to_json
   end
 
-  def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US"
-    Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location)
+  def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US", project_id: nil
+    Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location, project_id: project_id)
   end
 
-  def extract_job_json table, extract_file, job_id, location: "US"
+  def extract_job_json table, extract_file, job_id, location: "US", project_id: nil
     hash = {
       "jobReference" => {
-        "projectId" => project,
+        "projectId" => (project_id || project),
         "jobId" => job_id
       },
       "configuration" => {