This repository has been archived by the owner on Feb 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 224
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
af7ad38
commit defe7c6
Showing
16 changed files
with
282 additions
and
391 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,46 +1,23 @@ | ||
use std::fs::File; | ||
use std::io::BufReader; | ||
use std::time::SystemTime; | ||
|
||
use arrow2::error::Result; | ||
use arrow2::io::parquet::read; | ||
use arrow2::{array::Array, error::Result}; | ||
|
||
fn read_field(path: &str, row_group: usize, field: usize) -> Result<Box<dyn Array>> { | ||
// Open a file, a common operation in Rust | ||
let mut file = BufReader::new(File::open(path)?); | ||
|
||
// Read the files' metadata. This has a small IO cost because it requires seeking to the end | ||
// of the file to read its footer. | ||
let metadata = read::read_metadata(&mut file)?; | ||
|
||
// Convert the files' metadata into an arrow schema. This is CPU-only and amounts to | ||
// parse thrift if the arrow format is available on a key, or infering the arrow schema from | ||
// the parquet's physical, converted and logical types. | ||
let arrow_schema = read::get_schema(&metadata)?; | ||
|
||
// Created an iterator of column chunks. Each iteration | ||
// yields an iterator of compressed pages. There is almost no CPU work in iterating. | ||
let columns = read::get_column_iterator(&mut file, &metadata, row_group, field, None, vec![]); | ||
|
||
// get the columns' field | ||
let field = &arrow_schema.fields[field]; | ||
|
||
// This is the actual work. In this case, pages are read and | ||
// decompressed, decoded and deserialized to arrow. | ||
// Because `columns` is an iterator, it uses a combination of IO and CPU. | ||
let (array, _, _) = read::column_iter_to_array(columns, field, vec![])?; | ||
|
||
Ok(array) | ||
} | ||
|
||
fn main() -> Result<()> { | ||
use std::env; | ||
let args: Vec<String> = env::args().collect(); | ||
|
||
let file_path = &args[1]; | ||
let field = args[2].parse::<usize>().unwrap(); | ||
let row_group = args[3].parse::<usize>().unwrap(); | ||
|
||
let array = read_field(file_path, row_group, field)?; | ||
println!("{:?}", array); | ||
let reader = File::open(file_path)?; | ||
let reader = read::FileReader::try_new(reader, None, None, None, None)?; | ||
|
||
let start = SystemTime::now(); | ||
for maybe_chunk in reader { | ||
let columns = maybe_chunk?; | ||
assert!(!columns.is_empty()); | ||
} | ||
println!("took: {} ms", start.elapsed().unwrap().as_millis()); | ||
Ok(()) | ||
} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.