Skip to content

Commit

Permalink
add test for FLBA type
Browse files Browse the repository at this point in the history
  • Loading branch information
wgtmac committed Dec 9, 2022
1 parent a3e1cea commit c8a1375
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 4 deletions.
8 changes: 5 additions & 3 deletions cpp/src/parquet/page_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,12 @@ class TypedColumnIndexImpl : public TypedColumnIndex<DType> {
const format::ColumnIndex& column_index)
: column_index_(column_index) {
// Make sure the number of pages is valid and it does not overflow to int32_t.
if (column_index_.null_pages.size() != column_index_.min_values.size() ||
if (ARROW_PREDICT_FALSE(column_index_.null_pages.size() >=
static_cast<size_t>(std::numeric_limits<int32_t>::max())) ||
column_index_.null_pages.size() != column_index_.min_values.size() ||
column_index_.min_values.size() != column_index_.max_values.size() ||
ARROW_PREDICT_FALSE(column_index_.null_pages.size() >=
static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
(column_index_.__isset.null_counts &&
column_index_.null_counts.size() != column_index_.null_pages.size())) {
throw ParquetException("Invalid column index");
}

Expand Down
32 changes: 32 additions & 0 deletions cpp/src/parquet/page_index_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ void TestReadTypedColumnIndex(const std::string& file_name, int column_id,
} else if constexpr (std::is_same_v<T, float>) {
ASSERT_FLOAT_EQ(min_values.at(i), typed_column_index->min_values().at(page_id));
ASSERT_FLOAT_EQ(max_values.at(i), typed_column_index->max_values().at(page_id));
} else if constexpr (std::is_same_v<T, FLBA>) {
auto len = descr->type_length();
ASSERT_EQ(0, ::memcmp(min_values.at(i).ptr,
typed_column_index->min_values().at(page_id).ptr, len));
ASSERT_EQ(0, ::memcmp(max_values.at(i).ptr,
typed_column_index->max_values().at(page_id).ptr, len));
} else {
ASSERT_EQ(min_values.at(i), typed_column_index->min_values().at(page_id));
ASSERT_EQ(max_values.at(i), typed_column_index->max_values().at(page_id));
Expand Down Expand Up @@ -208,6 +214,32 @@ TEST(PageIndex, ReadBoolColumnIndex) {
null_pages, min_values, max_values, has_null_counts, null_counts);
}

namespace {
FLBA toFLBA(const char* ptr) { return FLBA{reinterpret_cast<const uint8_t*>(ptr)}; }
} // namespace

TEST(PageIndex, FixedLengthByteArrayColumnIndex) {
const int column_id = 0;
const size_t num_pages = 10;
const BoundaryOrder::type boundary_order = BoundaryOrder::Descending;
const std::vector<size_t> page_indices = {0, 4, 8};
const std::vector<bool> null_pages = {false, false, false};
const bool has_null_counts = true;
const std::vector<int64_t> null_counts = {9, 13, 9};
const std::vector<const char*> min_literals = {"\x00\x00\x03\x85", "\x00\x00\x01\xF5",
"\x00\x00\x00\x65"};
const std::vector<const char*> max_literals = {"\x00\x00\x03\xE8", "\x00\x00\x02\x58",
"\x00\x00\x00\xC8"};
const std::vector<FLBA> min_values = {toFLBA(min_literals[0]), toFLBA(min_literals[1]),
toFLBA(min_literals[2])};
const std::vector<FLBA> max_values = {toFLBA(max_literals[0]), toFLBA(max_literals[1]),
toFLBA(max_literals[2])};

TestReadTypedColumnIndex<FLBAType>(
"fixed_length_byte_array.parquet", column_id, num_pages, boundary_order,
page_indices, null_pages, min_values, max_values, has_null_counts, null_counts);
}

TEST(PageIndex, ReadColumnIndexWithNullPage) {
const int column_id = 0;
const size_t num_pages = 2;
Expand Down

0 comments on commit c8a1375

Please sign in to comment.