diff --git a/parquet/src/arrow/arrow_writer.rs b/parquet/src/arrow/arrow_writer.rs index be278eda5360..332b893aa4bc 100644 --- a/parquet/src/arrow/arrow_writer.rs +++ b/parquet/src/arrow/arrow_writer.rs @@ -1560,4 +1560,21 @@ mod tests { panic!("Statistics::Int64 missing") } } + + #[test] + fn statistics_null_counts_only_nulls() { + // check that null-count statistics for "only NULL"-columns are correct + let values = Arc::new(UInt64Array::from(vec![None, None])); + let file = one_column_roundtrip("null_counts", values, true); + + // check statistics are valid + let reader = SerializedFileReader::new(file).unwrap(); + let metadata = reader.metadata(); + assert_eq!(metadata.num_row_groups(), 1); + let row_group = metadata.row_group(0); + assert_eq!(row_group.num_columns(), 1); + let column = row_group.column(0); + let stats = column.statistics().unwrap(); + assert_eq!(stats.null_count(), 2); + } } diff --git a/parquet/src/column/writer.rs b/parquet/src/column/writer.rs index 57ccda3a3e61..910a9ed5dcaf 100644 --- a/parquet/src/column/writer.rs +++ b/parquet/src/column/writer.rs @@ -607,9 +607,11 @@ impl ColumnWriterImpl { let max_def_level = self.descr.max_def_level(); let max_rep_level = self.descr.max_rep_level(); + // always update column NULL count, no matter if page stats are used + self.num_column_nulls += self.num_page_nulls; + let page_statistics = if calculate_page_stat { self.update_column_min_max(); - self.num_column_nulls += self.num_page_nulls; Some(self.make_page_statistics()) } else { None