Skip to content

Commit

Permalink
Fix tests and parquet read performance (#19)
Browse files Browse the repository at this point in the history
* Fix tests #2

* use a bufreader for sync_reader

* enable back simd
  • Loading branch information
Igosuki authored Jan 16, 2022
1 parent e53d165 commit 2293921
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 9 deletions.
4 changes: 2 additions & 2 deletions datafusion/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ path = "src/lib.rs"
[features]
default = ["crypto_expressions", "regex_expressions", "unicode_expressions"]
# FIXME: https://github.com/jorgecarleitao/arrow2/issues/580
#simd = ["arrow/simd"]
simd = []
simd = ["arrow/simd"]
#simd = []
crypto_expressions = ["md-5", "sha2", "blake2", "blake3"]
regex_expressions = ["regex"]
unicode_expressions = ["unicode-segmentation"]
Expand Down
3 changes: 2 additions & 1 deletion datafusion/src/avro_to_arrow/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,14 @@ impl ReaderBuilder {
let (mut avro_schemas, mut schema, codec, file_marker) =
read::read_metadata(&mut source)?;
if let Some(proj) = self.projection {
let indices: Vec<usize> = schema
let mut indices: Vec<usize> = schema
.fields
.iter()
.filter(|f| !proj.contains(&f.name))
.enumerate()
.map(|(i, _)| i)
.collect();
indices.sort_by(|i1, i2| i2.cmp(i1));
for i in indices {
avro_schemas.remove(i);
schema.fields.remove(i);
Expand Down
2 changes: 1 addition & 1 deletion datafusion/src/datasource/file_format/avro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ mod tests {
"double_col: Float64",
"date_string_col: Binary",
"string_col: Binary",
"timestamp_col: Timestamp(Microsecond, None)",
"timestamp_col: Timestamp(Microsecond, Some(\"00:00\"))",
],
x
);
Expand Down
6 changes: 3 additions & 3 deletions datafusion/src/datasource/object_store/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ use crate::error::Result;

use super::{ObjectReaderStream, SizedFile};

impl ReadSeek for std::fs::File {}

#[derive(Debug)]
/// Local File System as Object Store.
pub struct LocalFileSystem;
Expand Down Expand Up @@ -81,7 +79,9 @@ impl ObjectReader for LocalFileReader {
}

fn sync_reader(&self) -> Result<Box<dyn ReadSeek + Send + Sync>> {
Ok(Box::new(File::open(&self.file.path)?))
let file = File::open(&self.file.path)?;
let buf_reader = BufReader::new(file);
Ok(Box::new(buf_reader))
}

fn sync_chunk_reader(
Expand Down
3 changes: 1 addition & 2 deletions datafusion/src/datasource/object_store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ use crate::error::{DataFusionError, Result};
/// Both Read and Seek
pub trait ReadSeek: Read + Seek {}

impl<R: Read + Seek> ReadSeek for std::io::BufReader<R> {}
impl<R: AsRef<[u8]>> ReadSeek for std::io::Cursor<R> {}
impl<R: Read + Seek> ReadSeek for R {}

/// Object Reader for one file in an object store.
///
Expand Down

0 comments on commit 2293921

Please sign in to comment.