Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add project_id to Bigquery::Table#extract and #extract_job #2692

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions google-cloud-bigquery/acceptance/bigquery/table_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -953,6 +953,27 @@
end
end

it "extracts read-only data to a url in your bucket with extract_job and project_id" do
public_bigquery = Google::Cloud::Bigquery.new project_id: "bigquery-public-data"
public_dataset = public_bigquery.dataset "samples"
public_table = public_dataset.table "shakespeare"

Tempfile.open "empty_extract_file.json" do |tmp|
dest_file_name = random_file_destination_name
extract_url = "gs://#{bucket.name}/#{dest_file_name}"

extract_job = public_table.extract_job extract_url, project_id: bigquery.project_id # The user's project, not the public data project
extract_job.wait_until_done!
extract_job.wait_until_done!
extract_job.wont_be :failed?
extract_job.source.table_id.must_equal public_table.table_id

extract_file = bucket.file dest_file_name
downloaded_file = extract_file.download tmp.path
downloaded_file.size.must_be :>, 0
end
end

it "extracts data to a url in your bucket with extract" do
# Make sure there is data to extract...
result = table.load local_file
Expand Down
15 changes: 10 additions & 5 deletions google-cloud-bigquery/lib/google/cloud/bigquery/extract_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,13 @@ def initialize gapi
# @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job
# configuration object for setting query options.
def self.from_options service, table, storage_files, options = {}
job_ref = service.job_ref_from options[:job_id], options[:prefix]
job_ref = service.job_ref_from options[:job_id],
options[:prefix],
project_id: options[:project_id]
storage_urls = Array(storage_files).map do |url|
url.respond_to?(:to_gs_url) ? url.to_gs_url : url
end
dest_format = options[:format]
if dest_format.nil?
dest_format = Convert.derive_source_format storage_urls.first
end
dest_format = get_dest_format options[:format], storage_urls
req = Google::Apis::BigqueryV2::Job.new(
job_reference: job_ref,
configuration: Google::Apis::BigqueryV2::JobConfiguration.new(
Expand All @@ -199,6 +198,12 @@ def self.from_options service, table, storage_files, options = {}
updater
end

##
# @private
def self.get_dest_format dest_format, storage_urls
dest_format || Convert.derive_source_format(storage_urls.first)
end

##
# Sets the geographic location where the job should run. Required
# except for US and EU.
Expand Down
4 changes: 2 additions & 2 deletions google-cloud-bigquery/lib/google/cloud/bigquery/service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -356,11 +356,11 @@ def list_projects options = {}
# If no job_id or prefix is given, always generate a client-side job ID
# anyway, for idempotent retry in the google-api-client layer.
# See https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid
def job_ref_from job_id, prefix, location: nil
def job_ref_from job_id, prefix, location: nil, project_id: nil
prefix ||= "job_"
job_id ||= "#{prefix}#{generate_id}"
job_ref = API::JobReference.new(
project_id: @project,
project_id: (project_id || @project),
job_id: job_id
)
# BigQuery does not allow nil location, but missing is ok.
Expand Down
52 changes: 46 additions & 6 deletions google-cloud-bigquery/lib/google/cloud/bigquery/table.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1407,6 +1407,9 @@ def copy destination_table, create: nil, write: nil, &block
# dashes. International characters are allowed. Label values are
# optional. Label keys must start with a letter and each label in the
# list must have a different key.
# @param [String] project_id Identifier for an alternative BigQuery
# project to be used to run the job. Optional. If not present, the
# default project for the client is used.
# @yield [job] a job configuration object
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
# configuration object for setting additional options.
Expand All @@ -1421,26 +1424,43 @@ def copy destination_table, create: nil, write: nil, &block
# table = dataset.table "my_table"
#
# extract_job = table.extract_job "gs://my-bucket/file-name.json",
# format: "json"
# format: "json"
#
# @example Extract from a read-only project using `project_id`:
# require "google/cloud/bigquery"
#
# readonly_project = "bigquery-public-data"
# bigquery = Google::Cloud::Bigquery.new project_id: readonly_project
# dataset = bigquery.dataset "samples"
# table = dataset.table "shakespeare"
#
# extract_job = table.extract_job "gs://my-bucket/shakespeare.csv",
# project_id: "my-project"
#
# @!group Data
#
def extract_job extract_url, format: nil, compression: nil,
delimiter: nil, header: nil, dryrun: nil, job_id: nil,
prefix: nil, labels: nil
prefix: nil, labels: nil, project_id: nil
ensure_service!
options = { format: format, compression: compression,
delimiter: delimiter, header: header, dryrun: dryrun,
job_id: job_id, prefix: prefix, labels: labels }
job_id: job_id, prefix: prefix, labels: labels,
project_id: project_id }
updater = ExtractJob::Updater.from_options service, table_ref,
extract_url, options
updater.location = location if location # may be table reference

yield updater if block_given?

job_gapi = updater.to_gapi
gapi = service.extract_table job_gapi
Job.from_gapi gapi, service
if project_id
service2 = service.dup
service2.project = project_id
extract_table job_gapi, service2
else
extract_table job_gapi, service
end
end

##
Expand Down Expand Up @@ -1476,6 +1496,9 @@ def extract_job extract_url, format: nil, compression: nil,
# exported data. Default is <code>,</code>.
# @param [Boolean] header Whether to print out a header row in the
# results. Default is `true`.
# @param [String] project_id Identifier for an alternative BigQuery
# project to be used to run the job. Optional. If not present, the
# default project for the client is used.
# @yield [job] a job configuration object
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
# configuration object for setting additional options.
Expand All @@ -1502,15 +1525,27 @@ def extract_job extract_url, format: nil, compression: nil,
# extract.labels = { "custom-label" => "custom-value" }
# end
#
# @example Extract from a read-only project using `project_id`:
# require "google/cloud/bigquery"
#
# readonly_project = "bigquery-public-data"
# bigquery = Google::Cloud::Bigquery.new project_id: readonly_project
# dataset = bigquery.dataset "samples"
# table = dataset.table "shakespeare"
#
# table.extract "gs://my-bucket/shakespeare.csv",
# project_id: "my-project"
#
# @!group Data
#
def extract extract_url, format: nil, compression: nil, delimiter: nil,
header: nil, &block
header: nil, project_id: nil, &block
job = extract_job extract_url,
format: format,
compression: compression,
delimiter: delimiter,
header: header,
project_id: project_id,
&block
job.wait_until_done!
ensure_job_succeeded! job
Expand Down Expand Up @@ -2485,6 +2520,11 @@ def get_table_ref table
end
end

def extract_table job_gapi, extract_service
gapi = extract_service.extract_table job_gapi
Job.from_gapi gapi, extract_service
end

##
# Yielded to a block to accumulate changes for a patch request.
class Updater < Table
Expand Down
16 changes: 16 additions & 0 deletions google-cloud-bigquery/support/doctest_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,22 @@ def mock_storage
end
end

doctest.before "Google::Cloud::Bigquery::Table#extract@Extract from a read-only project using `project_id`:" do
mock_bigquery do |mock|
mock.expect :get_dataset, dataset_full_gapi, ["my-project", "samples"]
mock.expect :get_table, table_full_gapi, ["my-project", "my_dataset", "shakespeare"]
mock.expect :insert_job, query_job_gapi, ["my-project", Google::Apis::BigqueryV2::Job]
end
end

doctest.before "Google::Cloud::Bigquery::Table#extract_job@Extract from a read-only project using `project_id`:" do
mock_bigquery do |mock|
mock.expect :get_dataset, dataset_full_gapi, ["my-project", "samples"]
mock.expect :get_table, table_full_gapi, ["my-project", "my_dataset", "shakespeare"]
mock.expect :insert_job, query_job_gapi, ["my-project", Google::Apis::BigqueryV2::Job]
end
end

doctest.before "Google::Cloud::Bigquery::Table#insert" do
mock_bigquery do |mock|
mock.expect :get_dataset, dataset_full_gapi, ["my-project", "my_dataset"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,21 @@
job.labels.must_equal labels
end

it "can extract itself and specify a different project for the job" do
mock = Minitest::Mock.new
bigquery.service.mocked_service = mock
project_id_2 = "other-project"
job_gapi = extract_job_gapi table, extract_file, project_id: project_id_2

mock.expect :insert_job, job_gapi, [project_id_2, job_gapi]

job = table.extract_job extract_url, project_id: project_id_2
mock.verify

job.must_be_kind_of Google::Cloud::Bigquery::ExtractJob
job.project_id.must_equal project_id_2
end

# Borrowed from MockStorage, extract to a common module?

def random_bucket_hash name=random_bucket_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,30 @@
result.must_equal true
end

def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US"
Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location)
it "can extract itself and specify a different project for the job" do
mock = Minitest::Mock.new
bigquery.service.mocked_service = mock
project_id_2 = "other-project"
job_gapi = extract_job_gapi table, extract_file, project_id: project_id_2
job_resp_gapi = job_gapi.dup
job_resp_gapi.status = status "done"

mock.expect :insert_job, job_resp_gapi, [project_id_2, job_gapi]

result = table.extract extract_url, project_id: project_id_2
mock.verify

result.must_equal true
end

def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US", project_id: nil
Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location, project_id: project_id)
end

def extract_job_json table, extract_file, job_id, location: "US"
def extract_job_json table, extract_file, job_id, location: "US", project_id: nil
{
"jobReference" => {
"projectId" => project,
"projectId" => (project_id || project),
"jobId" => job_id,
"location" => location
},
Expand Down
8 changes: 4 additions & 4 deletions google-cloud-bigquery/test/helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -597,14 +597,14 @@ def query_job_json query, job_id: "job_9876543210", location: "US"
hash.to_json
end

def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US"
Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location)
def extract_job_gapi table, extract_file, job_id: "job_9876543210", location: "US", project_id: nil
Google::Apis::BigqueryV2::Job.from_json extract_job_json(table, extract_file, job_id, location: location, project_id: project_id)
end

def extract_job_json table, extract_file, job_id, location: "US"
def extract_job_json table, extract_file, job_id, location: "US", project_id: nil
hash = {
"jobReference" => {
"projectId" => project,
"projectId" => (project_id || project),
"jobId" => job_id
},
"configuration" => {
Expand Down