From 9c6b74a3aa77c9715346a967fe1e047a108e4474 Mon Sep 17 00:00:00 2001 From: Jorge Leitao Date: Wed, 29 Jun 2022 10:16:00 -0700 Subject: [PATCH] Improved performance of writing CSV (#1128) --- src/io/csv/write/mod.rs | 11 +++++------ tests/it/io/csv/write.rs | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/io/csv/write/mod.rs b/src/io/csv/write/mod.rs index 1f124ef6769..62f228ef14d 100644 --- a/src/io/csv/write/mod.rs +++ b/src/io/csv/write/mod.rs @@ -27,16 +27,16 @@ fn new_serializers<'a, A: AsRef>( /// The vector is guaranteed to have `columns.len()` entries. /// Each `row` is guaranteed to have `columns.array().len()` fields. pub fn serialize>( - columns: &Chunk, + chunk: &Chunk, options: &SerializeOptions, ) -> Result>> { - let mut serializers = new_serializers(columns, options)?; + let mut serializers = new_serializers(chunk, options)?; - let mut rows = Vec::with_capacity(columns.len()); + let mut rows = Vec::with_capacity(chunk.len()); let mut row = vec![]; // this is where the (expensive) transposition happens: the outer loop is on rows, the inner on columns - (0..columns.len()).try_for_each(|_| { + (0..chunk.len()).try_for_each(|_| { serializers .iter_mut() // `unwrap` is infalible because `array.len()` equals `Chunk::len` @@ -49,8 +49,7 @@ pub fn serialize>( // replace last delimiter with new line let last_byte = row.len() - 1; row[last_byte] = b'\n'; - rows.push(row.clone()); - row.clear(); + rows.push(std::mem::take(&mut row)); } Result::Ok(()) })?; diff --git a/tests/it/io/csv/write.rs b/tests/it/io/csv/write.rs index 88b502adfb1..44bec50497f 100644 --- a/tests/it/io/csv/write.rs +++ b/tests/it/io/csv/write.rs @@ -423,3 +423,23 @@ fn write_escaping_resize_local_buf() { test_generic(chunk, &format!("\"{}\"\n", payload)); } } + +#[test] +fn serialize_vec() -> Result<()> { + let columns = data(); + + let options = SerializeOptions::default(); + + let data = serialize(&columns, &options)?; + + // check + assert_eq!( + vec![ + b"a b,123.564532,3,true,,00:20:34,d\n".to_vec(), + b"c,,2,false,2019-04-18 10:54:47.378,06:51:20,a b\n".to_vec(), + b"d,-556132.25,1,,2019-04-18 02:45:55.555,23:46:03,c\n".to_vec(), + ], + data + ); + Ok(()) +}