Skip to content

Commit

Permalink
Add project_id to Bigquery::Table#extract and #extract_job
Browse files Browse the repository at this point in the history
  • Loading branch information
quartzmo committed Nov 27, 2018
1 parent f2fba83 commit 7d4af7f
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 21 deletions.
21 changes: 21 additions & 0 deletions google-cloud-bigquery/acceptance/bigquery/table_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -953,6 +953,27 @@
end
end

it "extracts read-only data to a url in your bucket with extract_job and project_id" do
public_bigquery = Google::Cloud::Bigquery.new project_id: "bigquery-public-data"
public_dataset = public_bigquery.dataset "samples"
public_table = public_dataset.table "shakespeare"

Tempfile.open "empty_extract_file.json" do |tmp|
dest_file_name = random_file_destination_name
extract_url = "gs://#{bucket.name}/#{dest_file_name}"

extract_job = public_table.extract_job extract_url, project_id: bigquery.project_id # The user's project, not the public data project
extract_job.wait_until_done!
extract_job.wait_until_done!
extract_job.wont_be :failed?
extract_job.source.table_id.must_equal public_table.table_id

extract_file = bucket.file dest_file_name
downloaded_file = extract_file.download tmp.path
downloaded_file.size.must_be :>, 0
end
end

it "extracts data to a url in your bucket with extract" do
# Make sure there is data to extract...
result = table.load local_file
Expand Down
15 changes: 10 additions & 5 deletions google-cloud-bigquery/lib/google/cloud/bigquery/extract_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,13 @@ def initialize gapi
# @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
# configuration object for setting query options.
def self.from_options service, table, storage_files, options = {}
job_ref = service.job_ref_from options[:job_id], options[:prefix]
job_ref = service.job_ref_from options[:job_id],
options[:prefix],
project_id: options[:project_id]
storage_urls = Array(storage_files).map do |url|
url.respond_to?(:to_gs_url) ? url.to_gs_url : url
end
dest_format = options[:format]
if dest_format.nil?
dest_format = Convert.derive_source_format storage_urls.first
end
dest_format = get_dest_format options[:format], storage_urls
req = Google::Apis::BigqueryV2::Job.new(
job_reference: job_ref,
configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
Expand All @@ -199,6 +198,12 @@ def self.from_options service, table, storage_files, options = {}
updater
end

##
# @private
def self.get_dest_format dest_format, storage_urls
dest_format || Convert.derive_source_format(storage_urls.first)
end

##
# Sets the geographic location where the job should run. Required
# except for US and EU.
Expand Down
4 changes: 2 additions & 2 deletions google-cloud-bigquery/lib/google/cloud/bigquery/service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -356,11 +356,11 @@ def list_projects options = {}
# If no job_id or prefix is given, always generate a client-side job ID
# anyway, for idempotent retry in the google-api-client layer.
# See https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid
def job_ref_from job_id, prefix, location: nil
def job_ref_from job_id, prefix, location: nil, project_id: nil
prefix ||= "job_"
job_id ||= "#{prefix}#{generate_id}"
job_ref = API::JobReference.new(
project_id: @project,
project_id: (project_id || @project),
job_id: job_id
)
# BigQuery does not allow nil location, but missing is ok.
Expand Down
52 changes: 46 additions & 6 deletions google-cloud-bigquery/lib/google/cloud/bigquery/table.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1407,6 +1407,9 @@ def copy destination_table, create: nil, write: nil, &block
# dashes. International characters are allowed. Label values are
# optional. Label keys must start with a letter and each label in the
# list must have a different key.
# @param [String] project_id Identifier for an alternative BigQuery
# project to be used to run the job. Optional. If not present, the
# default project for the client is used.
# @yield [job] a job configuration object
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
# configuration object for setting additional options.
Expand All @@ -1421,26 +1424,43 @@ def copy destination_table, create: nil, write: nil, &block
# table = dataset.table "my_table"
#
# extract_job = table.extract_job "gs://my-bucket/file-name.json",
# format: "json"
# format: "json"
#
# @example Extract from a read-only project using `project_id`:
# require "google/cloud/bigquery"
#
# readonly_project = "bigquery-public-data"
# bigquery = Google::Cloud::Bigquery.new project_id: readonly_project
# dataset = bigquery.dataset "samples"
# table = dataset.table "shakespeare"
#
# extract_job = table.extract_job "gs://my-bucket/shakespeare.csv",
# project_id: "my-project"
#
# @!group Data
#
def extract_job extract_url, format: nil, compression: nil,
delimiter: nil, header: nil, dryrun: nil, job_id: nil,
prefix: nil, labels: nil
prefix: nil, labels: nil, project_id: nil
ensure_service!
options = { format: format, compression: compression,
delimiter: delimiter, header: header, dryrun: dryrun,
job_id: job_id, prefix: prefix, labels: labels }
job_id: job_id, prefix: prefix, labels: labels,
project_id: project_id }
updater = ExtractJob::Updater.from_options service, table_ref,
extract_url, options
updater.location = location if location # may be table reference

yield updater if block_given?

job_gapi = updater.to_gapi
gapi = service.extract_table job_gapi
Job.from_gapi gapi, service
if project_id
service2 = service.dup
service2.project = project_id
extract_table job_gapi, service2
else
extract_table job_gapi, service
end
end

##
Expand Down Expand Up @@ -1476,6 +1496,9 @@ def extract_job extract_url, format: nil, compression: nil,
# exported data. Default is <code>,</code>.
# @param [Boolean] header Whether to print out a header row in the
# results. Default is `true`.
# @param [String] project_id Identifier for an alternative BigQuery
# project to be used to run the job. Optional. If not present, the
# default project for the client is used.
# @yield [job] a job configuration object
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
# configuration object for setting additional options.
Expand All @@ -1502,15 +1525,27 @@ def extract_job extract_url, format: nil, compression: nil,
# extract.labels = { "custom-label" => "custom-value" }
# end
#
# @example Extract from a read-only project using `project_id`:
# require "google/cloud/bigquery"
#
# readonly_project = "bigquery-public-data"
# bigquery = Google::Cloud::Bigquery.new project_id: readonly_project
# dataset = bigquery.dataset "samples"
# table = dataset.table "shakespeare"
#
# table.extract "gs://my-bucket/shakespeare.csv",
# project_id: "my-project"
#
# @!group Data
#
def extract extract_url, format: nil, compression: nil, delimiter: nil,
header: nil, &block
header: nil, project_id: nil, &block
job = extract_job extract_url,
format: format,
compression: compression,
delimiter: delimiter,
header: header,
project_id: project_id,
&block
job.wait_until_done!
ensure_job_succeeded! job
Expand Down Expand Up @@ -2485,6 +2520,11 @@ def get_table_ref table
end
end

def extract_table job_gapi, extract_service
gapi = extract_service.extract_table job_gapi
Job.from_gapi gapi, extract_service
end

##
# Yielded to a block to accumulate changes for a patch request.
class Updater < Table
Expand Down
16 changes: 16 additions & 0 deletions google-cloud-bigquery/support/doctest_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,22 @@ def mock_storage
end
end

doctest.before "Google::Cloud::Bigquery::Table#extract@Extract from a read-only project using `project_id`:" do
mock_bigquery do |mock|
mock.expect :get_dataset, dataset_full_gapi, ["my-project", "samples"]
mock.expect :get_table, table_full_gapi, ["my-project", "my_dataset", "shakespeare"]
mock.expect :insert_job, query_job_gapi, ["my-project", Google::Apis::BigqueryV2::Job]
end
end

doctest.before "Google::Cloud::Bigquery::Table#extract_job@Extract from a read-only project using `project_id`:" do
mock_bigquery do |mock|
mock.expect :get_dataset, dataset_full_gapi, ["my-project", "samples"]
mock.expect :get_table, table_full_gapi, ["my-project", "my_dataset", "shakespeare"]
mock.expect :insert_job, query_job_gapi, ["my-project", Google::Apis::BigqueryV2::Job]
end
end

doctest.before "Google::Cloud::Bigquery::Table#insert" do
mock_bigquery do |mock|
mock.expect :get_dataset, dataset_full_gapi, ["my-project", "my_dataset"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,21 @@
job.labels.must_equal labels
end

it "can extract itself and specify a different project for the job" do
mock = Minitest::Mock.new
bigquery.service.mocked_service = mock
project_id_2 = "other-project"
job_gapi = extract_job_gapi table, extract_file, project_id: project_id_2

mock.expect :insert_job, job_gapi, [project_id_2, job_gapi]

job = table.extract_job extract_url, project_id: project_id_2
mock.verify

job.must_be_kind_of Google::Cloud::Bigquery::ExtractJob
job.project_id.must_equal project_id_2
end

# Borrowed from MockStorage, extract to a common module?

def random_bucket_hash name=random_bucket_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,30 @@
result.must_equal true
end

def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US"
Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location)
it "can extract itself and specify a different project for the job" do
mock = Minitest::Mock.new
bigquery.service.mocked_service = mock
project_id_2 = "other-project"
job_gapi = extract_job_gapi table, extract_file, project_id: project_id_2
job_resp_gapi = job_gapi.dup
job_resp_gapi.status = status "done"

mock.expect :insert_job, job_resp_gapi, [project_id_2, job_gapi]

result = table.extract extract_url, project_id: project_id_2
mock.verify

result.must_equal true
end

def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US", project_id: nil
Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location, project_id: project_id)
end

def extract_job_json table, extract_file, job_id, location: "US"
def extract_job_json table, extract_file, job_id, location: "US", project_id: nil
{
"jobReference" => {
"projectId" => project,
"projectId" => (project_id || project),
"jobId" => job_id,
"location" => location
},
Expand Down
8 changes: 4 additions & 4 deletions google-cloud-bigquery/test/helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -597,14 +597,14 @@ def query_job_json query, job_id: "job_9876543210", location: "US"
hash.to_json
end

def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US"
Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location)
def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US", project_id: nil
Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location, project_id: project_id)
end

def extract_job_json table, extract_file, job_id, location: "US"
def extract_job_json table, extract_file, job_id, location: "US", project_id: nil
hash = {
"jobReference" => {
"projectId" => project,
"projectId" => (project_id || project),
"jobId" => job_id
},
"configuration" => {
Expand Down

0 comments on commit 7d4af7f

Please sign in to comment.