Skip to content

Commit

Permalink
add back GetNonNullPageIndices
Browse files Browse the repository at this point in the history
  • Loading branch information
wgtmac committed Dec 3, 2022
1 parent de0b5dc commit 1783ef6
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 18 deletions.
24 changes: 14 additions & 10 deletions cpp/src/parquet/page_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,12 @@ class TypedColumnIndexImpl : public TypedColumnIndex<DType> {
auto plain_decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, &descr);
T value;
for (size_t i = 0; i < column_index_.null_pages.size(); ++i) {
if (column_index_.null_pages[i]) {
min_values_.push_back(std::nullopt);
max_values_.push_back(std::nullopt);
} else {
if (!column_index_.null_pages[i]) {
non_null_page_indices_.emplace_back(static_cast<int32_t>(i));
Decode<DType>(plain_decoder, column_index_.min_values[i], &value);
min_values_.push_back(value);
min_values_.emplace_back(value);
Decode<DType>(plain_decoder, column_index_.max_values[i], &value);
max_values_.push_back(value);
max_values_.emplace_back(value);
}
}
}
Expand All @@ -89,16 +87,22 @@ class TypedColumnIndexImpl : public TypedColumnIndex<DType> {
return column_index_.null_counts;
}

const std::vector<std::optional<T>>& min_values() const override { return min_values_; }
const std::vector<T>& min_values() const override { return min_values_; }

const std::vector<std::optional<T>>& max_values() const override { return max_values_; }
const std::vector<T>& max_values() const override { return max_values_; }

const std::vector<int32_t> GetNonNullPageIndices() const override {
return non_null_page_indices_;
}

private:
/// Wrapped thrift column index.
const format::ColumnIndex column_index_;
/// Decoded typed min/max values. Null pages are set to std::nullopt.
std::vector<std::optional<T>> min_values_;
std::vector<std::optional<T>> max_values_;
std::vector<T> min_values_;
std::vector<T> max_values_;
/// A list of page indices for not-null pages.
std::vector<int32_t> non_null_page_indices_;
};

class OffsetIndexImpl : public OffsetIndex {
Expand Down
21 changes: 13 additions & 8 deletions cpp/src/parquet/page_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,12 @@

#pragma once

#include <optional>
#include <set>
#include <vector>

#include "parquet/exception.h"
#include "parquet/platform.h"
#include "parquet/schema.h"

#include <vector>

namespace parquet {

/// \brief BoundaryOrder is a proxy around format::BoundaryOrder.
Expand Down Expand Up @@ -72,11 +70,18 @@ class PARQUET_EXPORT TypedColumnIndex : public ColumnIndex {
public:
using T = typename DType::c_type;

/// \brief Returns a list of lower bound for the values of every page.
virtual const std::vector<std::optional<T>>& min_values() const = 0;
/// \brief Returns a list of lower bound for the values of every non-null page.
/// Excluding non-null pages helps binary search if the values are ordered.
virtual const std::vector<T>& min_values() const = 0;

/// \brief Returns a list of upper bound for the values of every non-null page.
/// Excluding non-null pages helps binary search if the values are ordered.
virtual const std::vector<T>& max_values() const = 0;

/// \brief Returns a list of upper bound for the values of every page.
virtual const std::vector<std::optional<T>>& max_values() const = 0;
/// \brief Returns a list of page indices for not-null pages. It is helpful to
/// understand the original page id in the values returned from min_values()
/// and max_values() above.
virtual const std::vector<int32_t> GetNonNullPageIndices() const = 0;
};

using BoolColumnIndex = TypedColumnIndex<BooleanType>;
Expand Down

0 comments on commit 1783ef6

Please sign in to comment.