-
Notifications
You must be signed in to change notification settings - Fork 763
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ec16ca1
commit ed464ae
Showing
10 changed files
with
191 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,3 +58,7 @@ harness = false | |
[[bench]] | ||
name = "data_type" | ||
harness = false | ||
|
||
[[bench]] | ||
name = "csv" | ||
harness = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
extern crate core; | ||
|
||
use common_datavalues::serializations::formats::csv; | ||
use common_datavalues::ColumnRef; | ||
use common_datavalues::Series; | ||
use common_datavalues::SeriesFrom; | ||
use criterion::criterion_group; | ||
use criterion::criterion_main; | ||
use criterion::Criterion; | ||
use rand::rngs::StdRng; | ||
use rand::Rng; | ||
use rand::SeedableRng; | ||
|
||
fn add_benchmark(c: &mut Criterion) { | ||
(10..=21).step_by(2).for_each(|log2_size| { | ||
let size = 2usize.pow(log2_size); | ||
let col = create_primitive_array(size); | ||
c.bench_function( | ||
&format!("i32 2^{} not null, write_by_row", log2_size), | ||
|b| { | ||
b.iter(|| csv::write_by_row(&col)); | ||
}, | ||
); | ||
c.bench_function( | ||
&format!("i32 2^{} not null, write_iterator", log2_size), | ||
|b| { | ||
b.iter(|| csv::write_iterator(&col)); | ||
}, | ||
); | ||
}); | ||
} | ||
|
||
criterion_group!(benches, add_benchmark); | ||
criterion_main!(benches); | ||
|
||
pub fn create_primitive_array(size: usize) -> ColumnRef { | ||
let mut rng = seedable_rng(); | ||
|
||
let v = (0..size).map(|_| rng.gen()).collect::<Vec<i32>>(); | ||
Series::from_data(v) | ||
} | ||
|
||
pub fn seedable_rng() -> StdRng { | ||
StdRng::seed_from_u64(42) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
use common_io::prelude::FormatSettings; | ||
|
||
use crate::ColumnRef; | ||
use crate::DataType; | ||
use crate::TypeSerializer; | ||
|
||
#[allow(dead_code)] | ||
pub fn write_vec(col: &ColumnRef) -> Vec<u8> { | ||
let mut buf = Vec::with_capacity(1000 * 1000); | ||
|
||
let s = col.data_type().create_serializer(); | ||
let v = s | ||
.serialize_column(&col, &FormatSettings::default()) | ||
.unwrap(); | ||
for field in v { | ||
buf.extend_from_slice(&field.as_bytes()); | ||
} | ||
buf | ||
} | ||
|
||
pub fn write_by_row(col: &ColumnRef) -> Vec<u8> { | ||
let mut buf = Vec::with_capacity(1000 * 1000); | ||
let rows = col.len(); | ||
let s = col.data_type().create_serializer(); | ||
let f = &FormatSettings::default(); | ||
for row in 0..rows { | ||
s.write_csv_field(col, row, &mut buf, f).unwrap(); | ||
} | ||
buf | ||
} | ||
|
||
pub fn write_iterator(col: &ColumnRef) -> Vec<u8> { | ||
let mut buf = Vec::with_capacity(1000 * 1000); | ||
|
||
let s = col.data_type().create_serializer(); | ||
let mut stream = s.serialize_csv(&col, &FormatSettings::default()).unwrap(); | ||
while let Some(field) = stream.next() { | ||
buf.extend_from_slice(field); | ||
} | ||
buf | ||
} | ||
|
||
#[test] | ||
fn test_2() -> Result<()> { | ||
use crate::Series; | ||
use crate::SeriesFrom; | ||
let col = Series::from_data(vec![12u8, 23u8, 34u8]); | ||
let exp = [49, 50, 50, 51, 51, 52]; | ||
assert_eq!(write_iterator(&col), exp); | ||
assert_eq!(write_by_row(&col), exp); | ||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn test_s() -> Result<()> { | ||
use crate::Series; | ||
use crate::SeriesFrom; | ||
use crate::TypeSerializer; | ||
// let col = Series::from_data(vec![true, false, true]); | ||
// let col = Series::from_data(vec!["a", "a", "bc"]); | ||
// let col = Series::from_data(vec![12, 23, 34]); | ||
let col = Series::from_data(vec![12u8, 23u8, 34u8]); | ||
|
||
println!("{:?}", col); | ||
let s = col.data_type().create_serializer(); | ||
let mut stream = s.serialize_csv(&col, &FormatSettings::default())?; | ||
println!("{:?}", stream.next()); | ||
println!("{:?}", stream.next()); | ||
println!("{:?}", stream.next()); | ||
println!("{:?}", stream.next()); | ||
|
||
let col = Series::from_data(vec![Some(12), None, Some(34)]); | ||
println!("{:?}", col); | ||
let s = col.data_type().create_serializer(); | ||
let mut stream = s.serialize_csv(&col, &FormatSettings::default())?; | ||
println!("{:?}", stream.next()); | ||
println!("{:?}", stream.next()); | ||
println!("{:?}", stream.next()); | ||
println!("{:?}", stream.next()); | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,20 @@ | ||
pub mod csv; | ||
pub mod iterators; | ||
|
||
#[inline] | ||
pub fn lexical_to_bytes_mut_no_clear<N: lexical_core::ToLexical>(n: N, buf: &mut Vec<u8>) { | ||
buf.reserve(N::FORMATTED_SIZE_DECIMAL); | ||
let len0 = buf.len(); | ||
unsafe { | ||
// JUSTIFICATION | ||
// Benefit | ||
// Allows using the faster serializer lexical core and convert to string | ||
// Soundness | ||
// Length of buf is set as written length afterwards. lexical_core | ||
// creates a valid string, so doesn't need to be checked. | ||
let slice = | ||
std::slice::from_raw_parts_mut(buf.as_mut_ptr().add(len0), buf.capacity() - len0); | ||
let len = lexical_core::write(n, slice).len(); | ||
buf.set_len(len0 + len); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters