Skip to content

Commit

Permalink
support format in options of COPY command (apache#9744)
Browse files Browse the repository at this point in the history
* support format in options of COPY command

* fix clippy lint error

* add testcase to verify priority b/w STORED AS and OPTIONS (format <>)
  • Loading branch information
tinfoil-knight authored Mar 23, 2024
1 parent 02fd450 commit 40fb1b8
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 4 deletions.
12 changes: 8 additions & 4 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -850,7 +850,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
return plan_err!("Unsupported Value in COPY statement {}", value);
}
};
if !(&key.contains('.')) {
if !(key.contains('.') || key == "format") {
// If config does not belong to any namespace, assume it is
// a format option and apply the format prefix for backwards
// compatibility.
Expand All @@ -866,12 +866,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
FileType::from_str(&file_type).map_err(|_| {
DataFusionError::Configuration(format!("Unknown FileType {}", file_type))
})?
} else if let Some(format) = options.remove("format") {
// try to infer file format from the "format" key in options
FileType::from_str(&format)
.map_err(|e| DataFusionError::Configuration(format!("{}", e)))?
} else {
let e = || {
DataFusionError::Configuration(
"Format not explicitly set and unable to get file extension! Use STORED AS to define file format."
.to_string(),
)
"Format not explicitly set and unable to get file extension! Use STORED AS to define file format."
.to_string(),
)
};
// try to infer file format from file extension
let extension: &str = &Path::new(&statement.target)
Expand Down
12 changes: 12 additions & 0 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,18 @@ CopyTo: format=csv output_url=output.csv options: ()
quick_test(sql, plan);
}

#[test]
fn plan_copy_stored_as_priority() {
let sql = "COPY (select * from (values (1))) to 'output/' STORED AS CSV OPTIONS (format json)";
let plan = r#"
CopyTo: format=csv output_url=output/ options: (format json)
Projection: column1
Values: (Int64(1))
"#
.trim();
quick_test(sql, plan);
}

#[test]
fn plan_insert() {
let sql =
Expand Down
38 changes: 38 additions & 0 deletions datafusion/sqllogictest/test_files/copy.slt
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,44 @@ OPTIONS (
);


# Format Options Support with format in OPTIONS i.e. COPY { table_name | query } TO 'file_name' OPTIONS (format <format-name>, ...)

query I
COPY (select * from (values (1))) to 'test_files/scratch/copy/'
OPTIONS (format parquet);
----
1

query I
COPY (select * from (values (1))) to 'test_files/scratch/copy/'
OPTIONS (format parquet, compression 'zstd(10)');
----
1

query I
COPY (select * from (values (1))) to 'test_files/scratch/copy/'
OPTIONS (format json, compression gzip);
----
1

query I
COPY (select * from (values (1))) to 'test_files/scratch/copy/'
OPTIONS (
format csv,
has_header false,
compression xz,
datetime_format '%FT%H:%M:%S.%9f',
delimiter ';',
null_value 'NULLVAL'
);
----
1

query error DataFusion error: Invalid or Unsupported Configuration: This feature is not implemented: Unknown FileType: NOTVALIDFORMAT
COPY (select * from (values (1))) to 'test_files/scratch/copy/'
OPTIONS (format notvalidformat, compression 'zstd(5)');


# Error cases:

# Copy from table with options
Expand Down

0 comments on commit 40fb1b8

Please sign in to comment.