From 55df561c6f78272c95e0ca979f3b5654c8ba4419 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sat, 30 Apr 2022 14:52:09 +0200 Subject: [PATCH] prevent SO in large parquet files --- src/io/parquet/write/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index b9b8f1b2f66..7e6fcc16ba0 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -95,8 +95,11 @@ pub fn array_to_pages( encoding: Encoding, ) -> Result>> { // maximum page size is 2^31 e.g. i32::MAX - // we split at 2^30 to err on the safe side - if estimated_bytes_size(array) >= 2u32.pow(30) as usize { + // we split at 2^31 - 2^25 to err on the safe side + // we also check for an array.len > 3 to prevent infinite recursion + // still have to figure out how to deal with values that are i32::MAX size, such as very large + // strings or a list column with many elements + if (estimated_bytes_size(array)) >= (2u32.pow(31) - 2u32.pow(25)) as usize && array.len() > 3 { let split_at = array.len() / 2; let left = array.slice(0, split_at); let right = array.slice(split_at, array.len() - split_at);