forked from jorgecarleitao/arrow2
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactored JSON IO (better support for JSON and NDJSON) (jorgecarleit…
- Loading branch information
1 parent
d9c3854
commit 3c714aa
Showing
27 changed files
with
1,174 additions
and
1,206 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,32 @@ | ||
use std::fs::File; | ||
use std::sync::Arc; | ||
|
||
use arrow2::{ | ||
array::{Array, Int32Array}, | ||
chunk::Chunk, | ||
error::Result, | ||
error::ArrowError, | ||
io::json::write, | ||
}; | ||
|
||
fn write_batches(path: &str, names: Vec<String>, batches: &[Chunk<Arc<dyn Array>>]) -> Result<()> { | ||
fn write_array(path: &str, array: Box<dyn Array>) -> Result<(), ArrowError> { | ||
let mut writer = File::create(path)?; | ||
let format = write::Format::Json; | ||
|
||
let batches = batches.iter().cloned().map(Ok); | ||
let arrays = vec![Ok(array)].into_iter(); | ||
|
||
// Advancing this iterator serializes the next batch to its internal buffer (i.e. CPU-bounded) | ||
let blocks = write::Serializer::new(batches, names, vec![], format); | ||
// Advancing this iterator serializes the next array to its internal buffer (i.e. CPU-bounded) | ||
let blocks = write::Serializer::new(arrays, vec![]); | ||
|
||
// the operation of writing is IO-bounded. | ||
write::write(&mut writer, format, blocks)?; | ||
write::write(&mut writer, blocks)?; | ||
|
||
Ok(()) | ||
} | ||
|
||
fn main() -> Result<()> { | ||
let array = Arc::new(Int32Array::from(&[ | ||
Some(0), | ||
None, | ||
Some(2), | ||
Some(3), | ||
Some(4), | ||
Some(5), | ||
Some(6), | ||
])) as Arc<dyn Array>; | ||
|
||
write_batches( | ||
"example.json", | ||
vec!["c1".to_string()], | ||
&[Chunk::new(vec![array.clone()]), Chunk::new(vec![array])], | ||
) | ||
fn main() -> Result<(), ArrowError> { | ||
use std::env; | ||
let args: Vec<String> = env::args().collect(); | ||
|
||
let file_path = &args[1]; | ||
|
||
let array = Int32Array::from(&[Some(0), None, Some(2), Some(3), Some(4), Some(5), Some(6)]); | ||
|
||
write_array(file_path, Box::new(array)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,40 @@ | ||
use std::fs::File; | ||
use std::io::BufReader; | ||
use std::io::{BufReader, Seek}; | ||
use std::sync::Arc; | ||
|
||
use arrow2::array::Array; | ||
use arrow2::chunk::Chunk; | ||
use arrow2::error::Result; | ||
use arrow2::io::json::read; | ||
use arrow2::io::ndjson::read; | ||
use arrow2::io::ndjson::read::FallibleStreamingIterator; | ||
|
||
fn read_path(path: &str, projection: Option<Vec<&str>>) -> Result<Chunk<Arc<dyn Array>>> { | ||
// Example of reading a NDJSON file. | ||
fn read_path(path: &str) -> Result<Vec<Arc<dyn Array>>> { | ||
let batch_size = 1024; // number of rows per array | ||
let mut reader = BufReader::new(File::open(path)?); | ||
|
||
let fields = read::infer_and_reset(&mut reader, None)?; | ||
|
||
let fields = if let Some(projection) = projection { | ||
fields | ||
.into_iter() | ||
.filter(|field| projection.contains(&field.name.as_ref())) | ||
.collect() | ||
} else { | ||
fields | ||
}; | ||
|
||
// at most 1024 rows. This container can be re-used across batches. | ||
let mut rows = vec![String::default(); 1024]; | ||
|
||
// Reads up to 1024 rows. | ||
// this is IO-intensive and performs minimal CPU work. In particular, | ||
// no deserialization is performed. | ||
let read = read::read_rows(&mut reader, &mut rows)?; | ||
let rows = &rows[..read]; | ||
|
||
// deserialize `rows` into `Chunk`. This is CPU-intensive, has no IO, | ||
// and can be performed on a different thread pool via a channel. | ||
read::deserialize(rows, &fields) | ||
let data_type = read::infer(&mut reader, None)?; | ||
reader.rewind()?; | ||
|
||
let mut reader = read::FileReader::new(reader, vec!["".to_string(); batch_size], None); | ||
|
||
let mut arrays = vec![]; | ||
// `next` is IO-bounded | ||
while let Some(rows) = reader.next()? { | ||
// `deserialize` is CPU-bounded | ||
let array = read::deserialize(rows, data_type.clone())?; | ||
arrays.push(array); | ||
} | ||
|
||
Ok(arrays) | ||
} | ||
|
||
fn main() -> Result<()> { | ||
// Example of reading a NDJSON file from a path | ||
use std::env; | ||
let args: Vec<String> = env::args().collect(); | ||
|
||
let file_path = &args[1]; | ||
|
||
let batch = read_path(file_path, None)?; | ||
println!("{:#?}", batch); | ||
let arrays = read_path(file_path)?; | ||
println!("{:#?}", arrays); | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
use std::fs::File; | ||
|
||
use arrow2::array::{Array, Int32Array}; | ||
use arrow2::error::Result; | ||
use arrow2::io::ndjson::write; | ||
|
||
fn write_path(path: &str, array: Box<dyn Array>) -> Result<()> { | ||
let writer = File::create(path)?; | ||
|
||
let serializer = write::Serializer::new(vec![Ok(array)].into_iter(), vec![]); | ||
|
||
let mut writer = write::FileWriter::new(writer, serializer); | ||
writer.by_ref().collect::<Result<()>>() | ||
} | ||
|
||
fn main() -> Result<()> { | ||
// Example of reading a NDJSON file from a path | ||
use std::env; | ||
let args: Vec<String> = env::args().collect(); | ||
|
||
let file_path = &args[1]; | ||
|
||
let array = Box::new(Int32Array::from(&[ | ||
Some(0), | ||
None, | ||
Some(2), | ||
Some(3), | ||
Some(4), | ||
Some(5), | ||
Some(6), | ||
])); | ||
|
||
write_path(file_path, array)?; | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,14 @@ | ||
# Write JSON | ||
|
||
When compiled with feature `io_json`, you can use this crate to write JSON files. | ||
The following example writes a batch as a JSON file: | ||
When compiled with feature `io_json`, you can use this crate to write JSON. | ||
The following example writes an array to JSON: | ||
|
||
```rust | ||
{{#include ../../../examples/json_write.rs}} | ||
``` | ||
|
||
Likewise, you can also use it to write to NDJSON: | ||
|
||
```rust | ||
{{#include ../../../examples/ndjson_write.rs}} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.