Skip to content

Commit

Permalink
ARROW-9747: [Java][C++] Initial Support for 256-bit Decimals
Browse files Browse the repository at this point in the history
This provides sufficient coverage to support round trip between C++ and Java.  There are still some gaps in python.  Based on review, I will open JIRAs to track missing functionality (i.e. parquet support in C++).  Marking as draft until i can triage CI failures but early feedback is welcome.

Open questions I have:

[C++]
* Should we retain logic in decimal() factory function to adjust type on scale/precision or take an explicit argument or keep it as an alias for decimal128?

[Java]
* Naming:  Would Decimal256 be better then BigDecimal?

Closes apache#8475 from emkornfield/decimal256

Lead-authored-by: Mingyu Zhong <[email protected]>
Co-authored-by: Micah Kornfield <[email protected]>
Co-authored-by: Micah Kornfield <[email protected]>
Co-authored-by: emkornfield <[email protected]>
Co-authored-by: Ezra <[email protected]>
Signed-off-by: Micah Kornfield <[email protected]>
  • Loading branch information
4 people authored and GeorgeAp committed Jun 7, 2021
1 parent 27ac053 commit 390e1cf
Show file tree
Hide file tree
Showing 115 changed files with 3,706 additions and 1,039 deletions.
2 changes: 1 addition & 1 deletion c_glib/test/test-decimal128.rb
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def test_rescale_fail
decimal = Arrow::Decimal128.new(10)
message =
"[decimal128][rescale]: Invalid: " +
"Rescaling decimal value would cause data loss"
"Rescaling Decimal128 value would cause data loss"
assert_raise(Arrow::Error::Invalid.new(message)) do
decimal.rescale(1, -1)
end
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/array/array_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ struct ScalarFromArraySlotImpl {
return Finish(Decimal128(a.GetValue(index_)));
}

Status Visit(const Decimal256Array& a) {
return Finish(Decimal256(a.GetValue(index_)));
}

template <typename T>
Status Visit(const BaseBinaryArray<T>& a) {
return Finish(a.GetString(index_));
Expand Down
18 changes: 16 additions & 2 deletions cpp/src/arrow/array/array_decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ namespace arrow {
using internal::checked_cast;

// ----------------------------------------------------------------------
// Decimal
// Decimal128

Decimal128Array::Decimal128Array(const std::shared_ptr<ArrayData>& data)
: FixedSizeBinaryArray(data) {
ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL);
ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL128);
}

std::string Decimal128Array::FormatValue(int64_t i) const {
Expand All @@ -46,4 +46,18 @@ std::string Decimal128Array::FormatValue(int64_t i) const {
return value.ToString(type_.scale());
}

// ----------------------------------------------------------------------
// Decimal256

Decimal256Array::Decimal256Array(const std::shared_ptr<ArrayData>& data)
: FixedSizeBinaryArray(data) {
ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL256);
}

std::string Decimal256Array::FormatValue(int64_t i) const {
const auto& type_ = checked_cast<const Decimal256Type&>(*type());
const Decimal256 value(GetValue(i));
return value.ToString(type_.scale());
}

} // namespace arrow
16 changes: 16 additions & 0 deletions cpp/src/arrow/array/array_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,20 @@ class ARROW_EXPORT Decimal128Array : public FixedSizeBinaryArray {
// Backward compatibility
using DecimalArray = Decimal128Array;

// ----------------------------------------------------------------------
// Decimal256Array

/// Concrete Array class for 256-bit decimal data
class ARROW_EXPORT Decimal256Array : public FixedSizeBinaryArray {
public:
using TypeClass = Decimal256Type;

using FixedSizeBinaryArray::FixedSizeBinaryArray;

/// \brief Construct Decimal256Array from ArrayData instance
explicit Decimal256Array(const std::shared_ptr<ArrayData>& data);

std::string FormatValue(int64_t i) const;
};

} // namespace arrow
66 changes: 43 additions & 23 deletions cpp/src/arrow/array/array_dict_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -835,13 +835,13 @@ TEST(TestFixedSizeBinaryDictionaryBuilder, AppendArrayInvalidType) {
}
#endif

TEST(TestDecimalDictionaryBuilder, Basic) {
template <typename DecimalValue>
void TestDecimalDictionaryBuilderBasic(std::shared_ptr<DataType> decimal_type) {
// Build the dictionary Array
auto decimal_type = arrow::decimal(2, 0);
DictionaryBuilder<FixedSizeBinaryType> builder(decimal_type);

// Test data
std::vector<Decimal128> test{12, 12, 11, 12};
std::vector<DecimalValue> test{12, 12, 11, 12};
for (const auto& value : test) {
ASSERT_OK(builder.Append(value.ToBytes().data()));
}
Expand All @@ -857,40 +857,48 @@ TEST(TestDecimalDictionaryBuilder, Basic) {
ASSERT_TRUE(expected.Equals(result));
}

TEST(TestDecimalDictionaryBuilder, DoubleTableSize) {
const auto& decimal_type = arrow::decimal(21, 0);
TEST(TestDecimal128DictionaryBuilder, Basic) {
TestDecimalDictionaryBuilderBasic<Decimal128>(arrow::decimal128(2, 0));
}

TEST(TestDecimal256DictionaryBuilder, Basic) {
TestDecimalDictionaryBuilderBasic<Decimal256>(arrow::decimal256(76, 0));
}

void TestDecimalDictionaryBuilderDoubleTableSize(
std::shared_ptr<DataType> decimal_type, FixedSizeBinaryBuilder& decimal_builder) {
// Build the dictionary Array
DictionaryBuilder<FixedSizeBinaryType> dict_builder(decimal_type);

// Build expected data
Decimal128Builder decimal_builder(decimal_type);
Int16Builder int_builder;

// Fill with 1024 different values
for (int64_t i = 0; i < 1024; i++) {
const uint8_t bytes[] = {0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
12,
12,
static_cast<uint8_t>(i / 128),
static_cast<uint8_t>(i % 128)};
// Decimal256Builder takes 32 bytes, while Decimal128Builder takes only the first 16
// bytes.
const uint8_t bytes[32] = {0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
12,
12,
static_cast<uint8_t>(i / 128),
static_cast<uint8_t>(i % 128)};
ASSERT_OK(dict_builder.Append(bytes));
ASSERT_OK(decimal_builder.Append(bytes));
ASSERT_OK(int_builder.Append(static_cast<uint16_t>(i)));
}
// Fill with an already existing value
const uint8_t known_value[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 1};
const uint8_t known_value[32] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 1};
for (int64_t i = 0; i < 1024; i++) {
ASSERT_OK(dict_builder.Append(known_value));
ASSERT_OK(int_builder.Append(1));
Expand All @@ -911,6 +919,18 @@ TEST(TestDecimalDictionaryBuilder, DoubleTableSize) {
ASSERT_TRUE(expected.Equals(result));
}

TEST(TestDecimal128DictionaryBuilder, DoubleTableSize) {
const auto& decimal_type = arrow::decimal128(21, 0);
Decimal128Builder decimal_builder(decimal_type);
TestDecimalDictionaryBuilderDoubleTableSize(decimal_type, decimal_builder);
}

TEST(TestDecimal256DictionaryBuilder, DoubleTableSize) {
const auto& decimal_type = arrow::decimal256(21, 0);
Decimal256Builder decimal_builder(decimal_type);
TestDecimalDictionaryBuilderDoubleTableSize(decimal_type, decimal_builder);
}

TEST(TestNullDictionaryBuilder, Basic) {
// MakeBuilder
auto dict_type = dictionary(int8(), null());
Expand Down
60 changes: 52 additions & 8 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
std::make_shared<FixedSizeBinaryScalar>(
hello, fixed_size_binary(static_cast<int32_t>(hello->size()))),
std::make_shared<Decimal128Scalar>(Decimal128(10), decimal(16, 4)),
std::make_shared<Decimal256Scalar>(Decimal256(10), decimal(76, 38)),
std::make_shared<StringScalar>(hello),
std::make_shared<LargeStringScalar>(hello),
std::make_shared<ListScalar>(ArrayFromJSON(int8(), "[1, 2, 3]")),
Expand Down Expand Up @@ -2390,10 +2391,14 @@ TEST(TestAdaptiveUIntBuilderWithStartIntSize, TestReset) {
// ----------------------------------------------------------------------
// Test Decimal arrays

using DecimalVector = std::vector<Decimal128>;

template <typename TYPE>
class DecimalTest : public ::testing::TestWithParam<int> {
public:
using DecimalBuilder = typename TypeTraits<TYPE>::BuilderType;
using DecimalValue = typename TypeTraits<TYPE>::ScalarType::ValueType;
using DecimalArray = typename TypeTraits<TYPE>::ArrayType;
using DecimalVector = std::vector<DecimalValue>;

DecimalTest() {}

template <size_t BYTE_WIDTH = 16>
Expand All @@ -2409,8 +2414,8 @@ class DecimalTest : public ::testing::TestWithParam<int> {
template <size_t BYTE_WIDTH = 16>
void TestCreate(int32_t precision, const DecimalVector& draw,
const std::vector<uint8_t>& valid_bytes, int64_t offset) const {
auto type = std::make_shared<Decimal128Type>(precision, 4);
auto builder = std::make_shared<Decimal128Builder>(type);
auto type = std::make_shared<TYPE>(precision, 4);
auto builder = std::make_shared<DecimalBuilder>(type);

size_t null_count = 0;

Expand Down Expand Up @@ -2441,7 +2446,7 @@ class DecimalTest : public ::testing::TestWithParam<int> {
ASSERT_OK_AND_ASSIGN(expected_null_bitmap, internal::BytesToBits(valid_bytes));

int64_t expected_null_count = CountNulls(valid_bytes);
auto expected = std::make_shared<Decimal128Array>(
auto expected = std::make_shared<DecimalArray>(
type, size, expected_data, expected_null_bitmap, expected_null_count);

std::shared_ptr<Array> lhs = out->Slice(offset);
Expand All @@ -2450,7 +2455,9 @@ class DecimalTest : public ::testing::TestWithParam<int> {
}
};

TEST_P(DecimalTest, NoNulls) {
using Decimal128Test = DecimalTest<Decimal128Type>;

TEST_P(Decimal128Test, NoNulls) {
int32_t precision = GetParam();
std::vector<Decimal128> draw = {Decimal128(1), Decimal128(-2), Decimal128(2389),
Decimal128(4), Decimal128(-12348)};
Expand All @@ -2459,7 +2466,7 @@ TEST_P(DecimalTest, NoNulls) {
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(DecimalTest, WithNulls) {
TEST_P(Decimal128Test, WithNulls) {
int32_t precision = GetParam();
std::vector<Decimal128> draw = {Decimal128(1), Decimal128(2), Decimal128(-1),
Decimal128(4), Decimal128(-1), Decimal128(1),
Expand All @@ -2478,7 +2485,44 @@ TEST_P(DecimalTest, WithNulls) {
this->TestCreate(precision, draw, valid_bytes, 2);
}

INSTANTIATE_TEST_SUITE_P(DecimalTest, DecimalTest, ::testing::Range(1, 38));
INSTANTIATE_TEST_SUITE_P(Decimal128Test, Decimal128Test, ::testing::Range(1, 38));

using Decimal256Test = DecimalTest<Decimal256Type>;

TEST_P(Decimal256Test, NoNulls) {
int32_t precision = GetParam();
std::vector<Decimal256> draw = {Decimal256(1), Decimal256(-2), Decimal256(2389),
Decimal256(4), Decimal256(-12348)};
std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(Decimal256Test, WithNulls) {
int32_t precision = GetParam();
std::vector<Decimal256> draw = {Decimal256(1), Decimal256(2), Decimal256(-1),
Decimal256(4), Decimal256(-1), Decimal256(1),
Decimal256(2)};
Decimal256 big; // (pow(2, 255) - 1) / pow(10, 38)
ASSERT_OK_AND_ASSIGN(big,
Decimal256::FromString("578960446186580977117854925043439539266."
"34992332820282019728792003956564819967"));
draw.push_back(big);

Decimal256 big_negative; // -pow(2, 255) / pow(10, 38)
ASSERT_OK_AND_ASSIGN(big_negative,
Decimal256::FromString("-578960446186580977117854925043439539266."
"34992332820282019728792003956564819968"));
draw.push_back(big_negative);

std::vector<uint8_t> valid_bytes = {true, true, false, true, false,
true, true, true, true};
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

INSTANTIATE_TEST_SUITE_P(Decimal256Test, Decimal256Test,
::testing::Values(1, 2, 5, 10, 38, 39, 40, 75, 76));

// ----------------------------------------------------------------------
// Test rechunking
Expand Down
35 changes: 35 additions & 0 deletions cpp/src/arrow/array/builder_decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,39 @@ Status Decimal128Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
return Status::OK();
}

// ----------------------------------------------------------------------
// Decimal256Builder

Decimal256Builder::Decimal256Builder(const std::shared_ptr<DataType>& type,
MemoryPool* pool)
: FixedSizeBinaryBuilder(type, pool),
decimal_type_(internal::checked_pointer_cast<Decimal256Type>(type)) {}

Status Decimal256Builder::Append(const Decimal256& value) {
RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1));
UnsafeAppend(value);
return Status::OK();
}

void Decimal256Builder::UnsafeAppend(const Decimal256& value) {
value.ToBytes(GetMutableValue(length()));
byte_builder_.UnsafeAdvance(32);
UnsafeAppendToBitmap(true);
}

void Decimal256Builder::UnsafeAppend(util::string_view value) {
FixedSizeBinaryBuilder::UnsafeAppend(value);
}

Status Decimal256Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
std::shared_ptr<Buffer> data;
RETURN_NOT_OK(byte_builder_.Finish(&data));
std::shared_ptr<Buffer> null_bitmap;
RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));

*out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
capacity_ = length_ = null_count_ = 0;
return Status::OK();
}

} // namespace arrow
29 changes: 29 additions & 0 deletions cpp/src/arrow/array/builder_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,35 @@ class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
std::shared_ptr<Decimal128Type> decimal_type_;
};

class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder {
public:
using TypeClass = Decimal256Type;

explicit Decimal256Builder(const std::shared_ptr<DataType>& type,
MemoryPool* pool = default_memory_pool());

using FixedSizeBinaryBuilder::Append;
using FixedSizeBinaryBuilder::AppendValues;
using FixedSizeBinaryBuilder::Reset;

Status Append(const Decimal256& val);
void UnsafeAppend(const Decimal256& val);
void UnsafeAppend(util::string_view val);

Status FinishInternal(std::shared_ptr<ArrayData>* out) override;

/// \cond FALSE
using ArrayBuilder::Finish;
/// \endcond

Status Finish(std::shared_ptr<Decimal256Array>* out) { return FinishTyped(out); }

std::shared_ptr<DataType> type() const override { return decimal_type_; }

protected:
std::shared_ptr<Decimal256Type> decimal_type_;
};

using DecimalBuilder = Decimal128Builder;

} // namespace arrow
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/builder_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class DictionaryMemoTable::DictionaryMemoTableImpl {

template <typename T>
enable_if_no_memoize<T, Status> Visit(const T&) {
return Status::NotImplemented("Initialization of ", value_type_,
return Status::NotImplemented("Initialization of ", value_type_->ToString(),
" memo table is not implemented");
}

Expand Down
10 changes: 9 additions & 1 deletion cpp/src/arrow/array/builder_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,20 @@ class DictionaryBuilderBase : public ArrayBuilder {

/// \brief Append a decimal (only for Decimal128Type)
template <typename T1 = T>
enable_if_decimal<T1, Status> Append(const Decimal128& value) {
enable_if_decimal128<T1, Status> Append(const Decimal128& value) {
uint8_t data[16];
value.ToBytes(data);
return Append(data, 16);
}

/// \brief Append a decimal (only for Decimal128Type)
template <typename T1 = T>
enable_if_decimal256<T1, Status> Append(const Decimal256& value) {
uint8_t data[32];
value.ToBytes(data);
return Append(data, 32);
}

/// \brief Append a scalar null value
Status AppendNull() final {
length_ += 1;
Expand Down
Loading

0 comments on commit 390e1cf

Please sign in to comment.