From d1e981a51e1b330de1b7bbc20ee5d9ed7737e547 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Mon, 30 Aug 2021 20:21:58 -0700 Subject: [PATCH] ARROW-13628: [Format][C++][Java] Add MONTH_DAY_NANO interval type Trying to formalize [mailing list discussion](https://lists.apache.org/thread.html/rd919c4ed8ad2f2827a2d4f665d8da99e545ba92ef992b2e557831751%40%3Cdev.arrow.apache.org%3E) Closes #10177 from emkornfield/interval Lead-authored-by: Micah Kornfield Co-authored-by: emkornfield Co-authored-by: emkornfield Signed-off-by: Micah Kornfield --- cpp/src/arrow/array/array_base.cc | 1 + cpp/src/arrow/array/array_primitive.cc | 34 ++ cpp/src/arrow/array/array_primitive.h | 31 ++ cpp/src/arrow/array/array_test.cc | 117 +++-- cpp/src/arrow/array/builder_time.h | 13 + cpp/src/arrow/array/diff.cc | 10 + cpp/src/arrow/array/diff_test.cc | 8 + cpp/src/arrow/array/util.cc | 23 + cpp/src/arrow/builder.cc | 1 + .../compute/kernels/scalar_set_lookup.cc | 31 +- .../compute/kernels/scalar_set_lookup_test.cc | 18 + cpp/src/arrow/ipc/json_simple.cc | 35 ++ cpp/src/arrow/ipc/json_simple_test.cc | 6 + cpp/src/arrow/ipc/metadata_internal.cc | 11 + cpp/src/arrow/ipc/test_common.cc | 10 +- cpp/src/arrow/json/test_common.h | 2 + cpp/src/arrow/pretty_print.cc | 9 + cpp/src/arrow/pretty_print_test.cc | 31 ++ cpp/src/arrow/scalar.cc | 12 +- cpp/src/arrow/scalar.h | 5 + cpp/src/arrow/testing/gtest_util.cc | 3 +- cpp/src/arrow/testing/gtest_util.h | 1 + cpp/src/arrow/testing/json_internal.cc | 49 ++ cpp/src/arrow/testing/random.cc | 40 +- cpp/src/arrow/testing/random.h | 6 + cpp/src/arrow/testing/random_test.cc | 5 +- cpp/src/arrow/type.cc | 7 + cpp/src/arrow/type.h | 39 +- cpp/src/arrow/type_fwd.h | 11 + cpp/src/arrow/type_test.cc | 17 + cpp/src/arrow/type_traits.h | 18 + cpp/src/arrow/visitor.cc | 3 + cpp/src/arrow/visitor.h | 3 + cpp/src/arrow/visitor_inline.h | 1 + cpp/src/generated/File_generated.h | 3 - cpp/src/generated/Message_generated.h | 14 +- cpp/src/generated/Schema_generated.h | 140 ++---- cpp/src/generated/SparseTensor_generated.h | 74 ++- cpp/src/generated/Tensor_generated.h | 6 - cpp/src/generated/feather_generated.h | 21 - dev/archery/archery/integration/datagen.py | 48 ++ format/Schema.fbs | 18 +- .../apache/arrow/flatbuf/IntervalUnit.java | 3 +- .../org/apache/arrow/flatbuf/Timestamp.java | 37 +- .../main/codegen/data/ValueVectorTypes.tdd | 10 + .../templates/AbstractFieldReader.java | 2 +- .../codegen/templates/DenseUnionReader.java | 2 +- .../codegen/templates/HolderReaderImpl.java | 3 + .../main/codegen/templates/NullReader.java | 2 +- .../main/codegen/templates/UnionReader.java | 6 +- .../vector/IntervalMonthDayNanoVector.java | 442 ++++++++++++++++++ .../apache/arrow/vector/PeriodDuration.java | 64 +++ .../org/apache/arrow/vector/TypeLayout.java | 2 + .../arrow/vector/ipc/JsonFileReader.java | 23 + .../arrow/vector/ipc/JsonFileWriter.java | 8 + .../arrow/vector/types/IntervalUnit.java | 4 +- .../org/apache/arrow/vector/types/Types.java | 18 + .../validate/ValidateVectorTypeVisitor.java | 4 + .../TestIntervalMonthDayNanoVector.java | 99 ++++ .../arrow/vector/TestPeriodDuration.java | 46 ++ 60 files changed, 1466 insertions(+), 244 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc index 11770d0090ce4..dd3cec1d7e9e4 100644 --- a/cpp/src/arrow/array/array_base.cc +++ b/cpp/src/arrow/array/array_base.cc @@ -85,6 +85,7 @@ struct ScalarFromArraySlotImpl { Status Visit(const FixedSizeBinaryArray& a) { return Finish(a.GetString(index_)); } Status Visit(const DayTimeIntervalArray& a) { return Finish(a.Value(index_)); } + Status Visit(const MonthDayNanoIntervalArray& a) { return Finish(a.Value(index_)); } template Status Visit(const BaseListArray& a) { diff --git a/cpp/src/arrow/array/array_primitive.cc b/cpp/src/arrow/array/array_primitive.cc index a1aff933af414..5312c3ece1415 100644 --- a/cpp/src/arrow/array/array_primitive.cc +++ b/cpp/src/arrow/array/array_primitive.cc @@ -90,10 +90,44 @@ DayTimeIntervalArray::DayTimeIntervalArray(const std::shared_ptr& type int64_t null_count, int64_t offset) : PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {} +DayTimeIntervalArray::DayTimeIntervalArray(int64_t length, + const std::shared_ptr& data, + const std::shared_ptr& null_bitmap, + int64_t null_count, int64_t offset) + : PrimitiveArray(day_time_interval(), length, data, null_bitmap, null_count, offset) { +} + DayTimeIntervalType::DayMilliseconds DayTimeIntervalArray::GetValue(int64_t i) const { DCHECK(i < length()); return *reinterpret_cast( raw_values_ + (i + data_->offset) * byte_width()); } +// ---------------------------------------------------------------------- +// Month, day and Nanos interval + +MonthDayNanoIntervalArray::MonthDayNanoIntervalArray( + const std::shared_ptr& data) { + SetData(data); +} + +MonthDayNanoIntervalArray::MonthDayNanoIntervalArray( + const std::shared_ptr& type, int64_t length, + const std::shared_ptr& data, const std::shared_ptr& null_bitmap, + int64_t null_count, int64_t offset) + : PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {} + +MonthDayNanoIntervalArray::MonthDayNanoIntervalArray( + int64_t length, const std::shared_ptr& data, + const std::shared_ptr& null_bitmap, int64_t null_count, int64_t offset) + : PrimitiveArray(month_day_nano_interval(), length, data, null_bitmap, null_count, + offset) {} + +MonthDayNanoIntervalType::MonthDayNanos MonthDayNanoIntervalArray::GetValue( + int64_t i) const { + DCHECK(i < length()); + return *reinterpret_cast( + raw_values_ + (i + data_->offset) * byte_width()); +} + } // namespace arrow diff --git a/cpp/src/arrow/array/array_primitive.h b/cpp/src/arrow/array/array_primitive.h index b601eb770c32f..8f923f3df5e28 100644 --- a/cpp/src/arrow/array/array_primitive.h +++ b/cpp/src/arrow/array/array_primitive.h @@ -121,6 +121,10 @@ class ARROW_EXPORT DayTimeIntervalArray : public PrimitiveArray { const std::shared_ptr& null_bitmap = NULLPTR, int64_t null_count = kUnknownNullCount, int64_t offset = 0); + DayTimeIntervalArray(int64_t length, const std::shared_ptr& data, + const std::shared_ptr& null_bitmap = NULLPTR, + int64_t null_count = kUnknownNullCount, int64_t offset = 0); + TypeClass::DayMilliseconds GetValue(int64_t i) const; TypeClass::DayMilliseconds Value(int64_t i) const { return GetValue(i); } @@ -132,4 +136,31 @@ class ARROW_EXPORT DayTimeIntervalArray : public PrimitiveArray { const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); } }; +/// \brief Array of Month, Day and nanosecond values. +class ARROW_EXPORT MonthDayNanoIntervalArray : public PrimitiveArray { + public: + using TypeClass = MonthDayNanoIntervalType; + + explicit MonthDayNanoIntervalArray(const std::shared_ptr& data); + + MonthDayNanoIntervalArray(const std::shared_ptr& type, int64_t length, + const std::shared_ptr& data, + const std::shared_ptr& null_bitmap = NULLPTR, + int64_t null_count = kUnknownNullCount, int64_t offset = 0); + + MonthDayNanoIntervalArray(int64_t length, const std::shared_ptr& data, + const std::shared_ptr& null_bitmap = NULLPTR, + int64_t null_count = kUnknownNullCount, int64_t offset = 0); + + TypeClass::MonthDayNanos GetValue(int64_t i) const; + TypeClass::MonthDayNanos Value(int64_t i) const { return GetValue(i); } + + // For compatibility with Take kernel. + TypeClass::MonthDayNanos GetView(int64_t i) const { return GetValue(i); } + + int32_t byte_width() const { return sizeof(TypeClass::MonthDayNanos); } + + const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); } +}; + } // namespace arrow diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index 480c5f1c64953..cc45a36940003 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -37,6 +37,7 @@ #include "arrow/array/builder_binary.h" #include "arrow/array/builder_decimal.h" #include "arrow/array/builder_dict.h" +#include "arrow/array/builder_time.h" #include "arrow/array/data.h" #include "arrow/array/util.h" #include "arrow/buffer.h" @@ -491,6 +492,7 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr& scalar) static ScalarVector GetScalars() { auto hello = Buffer::FromString("hello"); DayTimeIntervalType::DayMilliseconds daytime{1, 100}; + MonthDayNanoIntervalType::MonthDayNanos month_day_nanos{5, 4, 100}; FieldVector union_fields{field("string", utf8()), field("number", int32()), field("other_number", int32())}; @@ -513,6 +515,7 @@ static ScalarVector GetScalars() { std::make_shared(1111, timestamp(TimeUnit::MILLI)), std::make_shared(1), std::make_shared(daytime), + std::make_shared(month_day_nanos), std::make_shared(60, duration(TimeUnit::SECOND)), std::make_shared(hello), std::make_shared(hello), @@ -811,9 +814,10 @@ TEST_F(TestBuilder, TestResizeDownsize) { template class TestPrimitiveBuilder : public TestBuilder { public: + typedef Attrs TestAttrs; typedef typename Attrs::ArrayType ArrayType; typedef typename Attrs::BuilderType BuilderType; - typedef typename Attrs::T T; + typedef typename Attrs::T CType; typedef typename Attrs::Type Type; virtual void SetUp() { @@ -867,7 +871,7 @@ class TestPrimitiveBuilder : public TestBuilder { ASSERT_TRUE(result->Equals(*expected)); } - void FlipValue(T* ptr) { + void FlipValue(CType* ptr) { auto byteptr = reinterpret_cast(ptr); *byteptr = static_cast(~*byteptr); } @@ -876,7 +880,7 @@ class TestPrimitiveBuilder : public TestBuilder { std::unique_ptr builder_; std::unique_ptr builder_nn_; - std::vector draws_; + std::vector draws_; std::vector valid_bytes_; }; @@ -905,16 +909,20 @@ struct UniformIntSampleType { \ static std::shared_ptr type() { return std::make_shared(); } -#define PINT_DECL(CapType, c_type) \ - struct P##CapType { \ - PTYPE_DECL(CapType, c_type) \ - static void draw(int64_t N, std::vector* draws) { \ - using sample_type = typename UniformIntSampleType::type; \ - const T lower = std::numeric_limits::min(); \ - const T upper = std::numeric_limits::max(); \ - randint(N, static_cast(lower), static_cast(upper), \ - draws); \ - } \ +#define PINT_DECL(CapType, c_type) \ + struct P##CapType { \ + PTYPE_DECL(CapType, c_type) \ + static void draw(int64_t N, std::vector* draws) { \ + using sample_type = typename UniformIntSampleType::type; \ + const T lower = std::numeric_limits::min(); \ + const T upper = std::numeric_limits::max(); \ + randint(N, static_cast(lower), static_cast(upper), \ + draws); \ + } \ + static T Modify(T inp) { return inp / 2; } \ + typedef \ + typename std::conditional::value, uint64_t, int64_t>::type \ + ConversionType; \ } #define PFLOAT_DECL(CapType, c_type, LOWER, UPPER) \ @@ -923,6 +931,8 @@ struct UniformIntSampleType { static void draw(int64_t N, std::vector* draws) { \ random_real(N, 0, LOWER, UPPER, draws); \ } \ + static T Modify(T inp) { return inp / 2; } \ + typedef double ConversionType; \ } PINT_DECL(UInt8, uint8_t); @@ -940,6 +950,33 @@ PFLOAT_DECL(Double, double, -1000.0, 1000.0); struct PBoolean { PTYPE_DECL(Boolean, uint8_t) + static T Modify(T inp) { return !inp; } + typedef int64_t ConversionType; +}; + +struct PDayTimeInterval { + using DayMilliseconds = DayTimeIntervalType::DayMilliseconds; + PTYPE_DECL(DayTimeInterval, DayMilliseconds); + static void draw(int64_t N, std::vector* draws) { return rand_day_millis(N, draws); } + + static DayMilliseconds Modify(DayMilliseconds inp) { + inp.days /= 2; + return inp; + } + typedef DayMilliseconds ConversionType; +}; + +struct PMonthDayNanoInterval { + using MonthDayNanos = MonthDayNanoIntervalType::MonthDayNanos; + PTYPE_DECL(MonthDayNanoInterval, MonthDayNanos); + static void draw(int64_t N, std::vector* draws) { + return rand_month_day_nanos(N, draws); + } + static MonthDayNanos Modify(MonthDayNanos inp) { + inp.days /= 2; + return inp; + } + typedef MonthDayNanos ConversionType; }; template <> @@ -952,7 +989,7 @@ void TestPrimitiveBuilder::RandomData(int64_t N, double pct_null) { } template <> -void TestPrimitiveBuilder::FlipValue(T* ptr) { +void TestPrimitiveBuilder::FlipValue(CType* ptr) { *ptr = !*ptr; } @@ -1068,7 +1105,8 @@ TEST(NumericBuilderAccessors, TestSettersGetters) { } typedef ::testing::Types + PInt32, PInt64, PFloat, PDouble, PDayTimeInterval, + PMonthDayNanoInterval> Primitives; TYPED_TEST_SUITE(TestPrimitiveBuilder, Primitives); @@ -1155,12 +1193,13 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendEmptyValue) { // implementation detail: the value slots are 0-initialized for (int64_t i = 0; i < result->length(); ++i) { - ASSERT_EQ(result->Value(i), 0); + typename TestFixture::CType t{}; + ASSERT_EQ(result->Value(i), t); } } TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) { - DECL_T(); + typedef typename TestFixture::CType T; int64_t size = 1000; @@ -1190,7 +1229,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) { } TYPED_TEST(TestPrimitiveBuilder, Equality) { - DECL_T(); + typedef typename TestFixture::CType T; const int64_t size = 1000; this->RandomData(size); @@ -1226,7 +1265,7 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) { } TYPED_TEST(TestPrimitiveBuilder, SliceEquality) { - DECL_T(); + typedef typename TestFixture::CType T; const int64_t size = 1000; this->RandomData(size); @@ -1259,7 +1298,7 @@ TYPED_TEST(TestPrimitiveBuilder, SliceEquality) { } TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) { - DECL_T(); + typedef typename TestFixture::CType T; const int64_t size = 10000; @@ -1315,7 +1354,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) { } TYPED_TEST(TestPrimitiveBuilder, TestAppendValues) { - DECL_T(); + typedef typename TestFixture::CType T; int64_t size = 10000; this->RandomData(size); @@ -1351,7 +1390,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValues) { } TYPED_TEST(TestPrimitiveBuilder, TestTypedFinish) { - DECL_T(); + typedef typename TestFixture::CType T; int64_t size = 1000; this->RandomData(size); @@ -1403,7 +1442,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterNullValid) { } TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) { - DECL_T(); + typedef typename TestFixture::CType T; int64_t size = 10000; this->RandomData(size); @@ -1411,7 +1450,9 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) { auto& draws = this->draws_; auto& valid_bytes = this->valid_bytes_; - auto halve = [&draws](int64_t index) { return draws[index] / 2; }; + auto halve = [&draws](int64_t index) { + return TestFixture::TestAttrs::Modify(draws[index]); + }; auto lazy_iter = internal::MakeLazyRange(halve, size); ASSERT_OK(this->builder_->AppendValues(lazy_iter.begin(), lazy_iter.end(), @@ -1419,7 +1460,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) { std::vector halved; transform(draws.begin(), draws.end(), back_inserter(halved), - [](T in) { return in / 2; }); + [](T in) { return TestFixture::TestAttrs::Modify(in); }); std::shared_ptr result; FinishAndCheckPadding(this->builder_.get(), &result); @@ -1433,12 +1474,9 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) { } TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterConverted) { - DECL_T(); + typedef typename TestFixture::CType T; // find type we can safely convert the tested values to and from - using conversion_type = - typename std::conditional::value, double, - typename std::conditional::value, - uint64_t, int64_t>::type>::type; + using conversion_type = typename TestFixture::TestAttrs::ConversionType; int64_t size = 10000; this->RandomData(size); @@ -1474,7 +1512,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterConverted) { } TYPED_TEST(TestPrimitiveBuilder, TestZeroPadded) { - DECL_T(); + typedef typename TestFixture::CType T; int64_t size = 10000; this->RandomData(size); @@ -1493,7 +1531,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestZeroPadded) { TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesStdBool) { // ARROW-1383 - DECL_T(); + typedef typename TestFixture::CType T; int64_t size = 10000; this->RandomData(size); @@ -3161,4 +3199,19 @@ TEST(TestSwapEndianArrayData, ExtensionType) { AssertArrayDataEqualsWithSwapEndian(test_data, expected_data); } +TEST(TestSwapEndianArrayData, MonthDayNanoInterval) { + auto array = ArrayFromJSON(month_day_nano_interval(), R"([[0, 1, 2], + [5000, 200, 3000000000]])"); + auto expected_array = + ArrayFromJSON(month_day_nano_interval(), R"([[0, 16777216, 144115188075855872], + [-2012020736, -939524096, 26688110733557760]])"); + + auto swap_array = MakeArray(*::arrow::internal::SwapEndianArrayData(array->data())); + EXPECT_TRUE(!swap_array->Equals(array)); + ASSERT_ARRAYS_EQUAL(*swap_array, *expected_array); + ASSERT_ARRAYS_EQUAL( + *MakeArray(*::arrow::internal::SwapEndianArrayData(swap_array->data())), *array); + ASSERT_OK(swap_array->ValidateFull()); +} + } // namespace arrow diff --git a/cpp/src/arrow/array/builder_time.h b/cpp/src/arrow/array/builder_time.h index ccd11c2234590..55a7beaaa5067 100644 --- a/cpp/src/arrow/array/builder_time.h +++ b/cpp/src/arrow/array/builder_time.h @@ -40,4 +40,17 @@ class ARROW_EXPORT DayTimeIntervalBuilder : public NumericBuilder(type, pool) {} }; +class ARROW_EXPORT MonthDayNanoIntervalBuilder + : public NumericBuilder { + public: + using MonthDayNanos = MonthDayNanoIntervalType::MonthDayNanos; + + explicit MonthDayNanoIntervalBuilder(MemoryPool* pool = default_memory_pool()) + : MonthDayNanoIntervalBuilder(month_day_nano_interval(), pool) {} + + explicit MonthDayNanoIntervalBuilder(std::shared_ptr type, + MemoryPool* pool = default_memory_pool()) + : NumericBuilder(type, pool) {} +}; + } // namespace arrow diff --git a/cpp/src/arrow/array/diff.cc b/cpp/src/arrow/array/diff.cc index a94ca178a4097..6d9619bb8aa91 100644 --- a/cpp/src/arrow/array/diff.cc +++ b/cpp/src/arrow/array/diff.cc @@ -460,6 +460,16 @@ class MakeFormatterImpl { return Status::OK(); } + Status Visit(const MonthDayNanoIntervalType&) { + impl_ = [](const Array& array, int64_t index, std::ostream* os) { + auto month_day_nanos = + checked_cast(array).Value(index); + *os << month_day_nanos.months << "m" << month_day_nanos.days << "d" + << month_day_nanos.nanoseconds << "ns"; + }; + return Status::OK(); + } + // format Binary, LargeBinary and FixedSizeBinary in hexadecimal template enable_if_binary_like Visit(const T&) { diff --git a/cpp/src/arrow/array/diff_test.cc b/cpp/src/arrow/array/diff_test.cc index a5022be59d749..c5cf94fc06957 100644 --- a/cpp/src/arrow/array/diff_test.cc +++ b/cpp/src/arrow/array/diff_test.cc @@ -500,6 +500,14 @@ TEST_F(DiffTest, UnifiedDiffFormatter) { -1970-01-02 03:04:05.000678 @@ -4, +3 @@ +1970-01-02 03:04:05.000678 +)"); + + // Month, Day, Nano Intervals + base_ = ArrayFromJSON(month_day_nano_interval(), R"([[2, 3, 1]])"); + target_ = ArrayFromJSON(month_day_nano_interval(), R"([])"); + AssertDiffAndFormat(R"( +@@ -0, +0 @@ +-2m3d1ns )"); // lists diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc index 5e95dc93f563f..f12281155b8dc 100644 --- a/cpp/src/arrow/array/util.cc +++ b/cpp/src/arrow/array/util.cc @@ -201,6 +201,29 @@ class ArrayDataEndianSwapper { return Status::OK(); } + Status Visit(const MonthDayNanoIntervalType& type) { + using MonthDayNanos = MonthDayNanoIntervalType::MonthDayNanos; + auto data = reinterpret_cast(data_->buffers[1]->data()); + ARROW_ASSIGN_OR_RAISE(auto new_buffer, AllocateBuffer(data_->buffers[1]->size())); + auto new_data = reinterpret_cast(new_buffer->mutable_data()); + int64_t length = data_->length; + for (int64_t i = 0; i < length; i++) { + MonthDayNanoIntervalType::MonthDayNanos tmp = data[i]; +#if ARROW_LITTLE_ENDIAN + tmp.months = BitUtil::FromBigEndian(tmp.months); + tmp.days = BitUtil::FromBigEndian(tmp.days); + tmp.nanoseconds = BitUtil::FromBigEndian(tmp.nanoseconds); +#else + tmp.months = BitUtil::FromLittleEndian(tmp.months); + tmp.days = BitUtil::FromLittleEndian(tmp.days); + tmp.nanoseconds = BitUtil::FromLittleEndian(tmp.nanoseconds); +#endif + new_data[i] = tmp; + } + out_->buffers[1] = std::move(new_buffer); + return Status::OK(); + } + Status Visit(const NullType& type) { return Status::OK(); } Status Visit(const BooleanType& type) { return Status::OK(); } Status Visit(const Int8Type& type) { return Status::OK(); } diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index f22228a458897..37cc9e07ad4b6 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -129,6 +129,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, BUILDER_CASE(Timestamp); BUILDER_CASE(MonthInterval); BUILDER_CASE(DayTimeInterval); + BUILDER_CASE(MonthDayNanoInterval); BUILDER_CASE(Boolean); BUILDER_CASE(HalfFloat); BUILDER_CASE(Float); diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc index 3e2e95e5401e9..96d8ba23c389b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc @@ -164,8 +164,10 @@ struct InitStateVisitor { } template - enable_if_t::value && !is_boolean_type::value, Status> Visit( - const Type&) { + enable_if_t::value && !is_boolean_type::value && + !std::is_same::value, + Status> + Visit(const Type&) { return Init::Type>(); } @@ -177,6 +179,10 @@ struct InitStateVisitor { // Handle Decimal128Type, FixedSizeBinaryType Status Visit(const FixedSizeBinaryType& type) { return Init(); } + Status Visit(const MonthDayNanoIntervalType& type) { + return Init(); + } + Result> GetResult() { if (!options.value_set.type()->Equals(arg_type)) { ARROW_ASSIGN_OR_RAISE( @@ -262,8 +268,10 @@ struct IndexInVisitor { } template - enable_if_t::value && !is_boolean_type::value, Status> Visit( - const Type&) { + enable_if_t::value && !is_boolean_type::value && + !std::is_same::value, + Status> + Visit(const Type&) { return ProcessIndexIn< typename UnsignedIntType::Type>(); } @@ -278,6 +286,10 @@ struct IndexInVisitor { return ProcessIndexIn(); } + Status Visit(const MonthDayNanoIntervalType& type) { + return ProcessIndexIn(); + } + Status Execute() { Status s = VisitTypeInline(*data.type, this); if (!s.ok()) { @@ -352,8 +364,10 @@ struct IsInVisitor { } template - enable_if_t::value && !is_boolean_type::value, Status> Visit( - const Type&) { + enable_if_t::value && !is_boolean_type::value && + !std::is_same::value, + Status> + Visit(const Type&) { return ProcessIsIn::Type>(); } @@ -367,6 +381,10 @@ struct IsInVisitor { return ProcessIsIn(); } + Status Visit(const MonthDayNanoIntervalType& type) { + return ProcessIsIn(); + } + Status Execute() { return VisitTypeInline(*data.type, this); } }; @@ -396,6 +414,7 @@ void AddBasicSetLookupKernels(ScalarKernel kernel, AddKernels(BaseBinaryTypes()); AddKernels(NumericTypes()); AddKernels(TemporalTypes()); + AddKernels({month_day_nano_interval()}); std::vector other_types = {Type::BOOL, Type::DECIMAL, Type::FIXED_SIZE_BINARY}; diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc index 67bfe30f3ddf2..284c8ccdebf80 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc @@ -803,6 +803,24 @@ TEST_F(TestIndexInKernel, FixedSizeBinary) { CheckIndexIn(fixed_size_binary(0), R"([])", R"([])", R"([])"); } +TEST_F(TestIndexInKernel, MonthDayNanoInterval) { + auto type = month_day_nano_interval(); + + CheckIndexIn(type, + /*input=*/R"([[5, -1, 5], null, [4, 5, 6], [5, -1, 5], [1, 2, 3]])", + /*value_set=*/R"([null, [4, 5, 6], [5, -1, 5]])", + /*expected=*/R"([2, 0, 1, 2, null])", + /*skip_nulls=*/false); + + // Duplicates in value_set + CheckIndexIn( + type, + /*input=*/R"([[7, 8, 0], null, [0, 0, 0], [7, 8, 0], [0, 0, 1]])", + /*value_set=*/R"([null, null, [0, 0, 0], [0, 0, 0], [7, 8, 0], [7, 8, 0]])", + /*expected=*/R"([4, 0, 2, 4, null])", + /*skip_nulls=*/false); +} + TEST_F(TestIndexInKernel, Decimal) { auto type = decimal(2, 0); diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc index 117b82df30d90..34b0f3fba598d 100644 --- a/cpp/src/arrow/ipc/json_simple.cc +++ b/cpp/src/arrow/ipc/json_simple.cc @@ -410,6 +410,40 @@ class DayTimeIntervalConverter final std::shared_ptr builder_; }; +class MonthDayNanoIntervalConverter final + : public ConcreteConverter { + public: + explicit MonthDayNanoIntervalConverter(const std::shared_ptr& type) { + this->type_ = type; + builder_ = std::make_shared(default_memory_pool()); + } + + Status AppendValue(const rj::Value& json_obj) override { + if (json_obj.IsNull()) { + return this->AppendNull(); + } + MonthDayNanoIntervalType::MonthDayNanos value; + if (!json_obj.IsArray()) { + return JSONTypeError("array", json_obj.GetType()); + } + if (json_obj.Size() != 3) { + return Status::Invalid( + "month_day_nano_interval must have exactly 3 elements, had ", json_obj.Size()); + } + RETURN_NOT_OK(ConvertNumber(json_obj[0], *this->type_, &value.months)); + RETURN_NOT_OK(ConvertNumber(json_obj[1], *this->type_, &value.days)); + RETURN_NOT_OK( + ConvertNumber(json_obj[2], *this->type_, &value.nanoseconds)); + + return builder_->Append(value); + } + + std::shared_ptr builder() override { return builder_; } + + private: + std::shared_ptr builder_; +}; + // ------------------------------------------------------------------------ // Converter for binary and string arrays @@ -856,6 +890,7 @@ Status GetConverter(const std::shared_ptr& type, SIMPLE_CONVERTER_CASE(Type::DENSE_UNION, UnionConverter) SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTHS, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::INTERVAL_DAY_TIME, DayTimeIntervalConverter) + SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTH_DAY_NANO, MonthDayNanoIntervalConverter) default: return ConversionNotImplemented(type); } diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc index 512905dde55f5..ce2c37b79574f 100644 --- a/cpp/src/arrow/ipc/json_simple_test.cc +++ b/cpp/src/arrow/ipc/json_simple_test.cc @@ -475,6 +475,12 @@ TEST(TestDayTimeInterval, Basics) { {{1, -600}, {}}); } +TEST(MonthDayNanoInterval, Basics) { + auto type = month_day_nano_interval(); + AssertJSONArray(type, R"([[1, -600, 5000], null])", + {true, false}, {{1, -600, 5000}, {}}); +} + TEST(TestFixedSizeBinary, Basics) { std::shared_ptr type = fixed_size_binary(3); std::shared_ptr expected, actual; diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc index b1b9e56528ead..f7fd46ee87550 100644 --- a/cpp/src/arrow/ipc/metadata_internal.cc +++ b/cpp/src/arrow/ipc/metadata_internal.cc @@ -333,6 +333,10 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data, *out = day_time_interval(); return Status::OK(); } + case flatbuf::IntervalUnit::MONTH_DAY_NANO: { + *out = month_day_nano_interval(); + return Status::OK(); + } } return Status::NotImplemented("Unrecognized interval type."); } @@ -587,6 +591,13 @@ class FieldToFlatbufferVisitor { return Status::OK(); } + Status Visit(const MonthDayNanoIntervalType& type) { + fb_type_ = flatbuf::Type::Interval; + type_offset_ = + flatbuf::CreateInterval(fbb_, flatbuf::IntervalUnit::MONTH_DAY_NANO).Union(); + return Status::OK(); + } + Status Visit(const MonthIntervalType& type) { fb_type_ = flatbuf::Type::Interval; type_offset_ = diff --git a/cpp/src/arrow/ipc/test_common.cc b/cpp/src/arrow/ipc/test_common.cc index ab10238850c97..c93f1f60e6e5a 100644 --- a/cpp/src/arrow/ipc/test_common.cc +++ b/cpp/src/arrow/ipc/test_common.cc @@ -836,12 +836,13 @@ Status MakeIntervals(std::shared_ptr* out) { auto f2 = field("f2", duration(TimeUnit::SECOND)); auto f3 = field("f3", day_time_interval()); auto f4 = field("f4", month_interval()); - auto schema = ::arrow::schema({f0, f1, f2, f3, f4}); + auto f5 = field("f5", month_day_nano_interval()); + auto schema = ::arrow::schema({f0, f1, f2, f3, f4, f5}); std::vector ts_values = {1489269000000, 1489270000000, 1489271000000, 1489272000000, 1489272000000, 1489273000000}; - std::shared_ptr a0, a1, a2, a3, a4; + std::shared_ptr a0, a1, a2, a3, a4, a5; ArrayFromVector(f0->type(), is_valid, ts_values, &a0); ArrayFromVector(f1->type(), is_valid, ts_values, &a1); ArrayFromVector(f2->type(), is_valid, ts_values, &a2); @@ -849,8 +850,11 @@ Status MakeIntervals(std::shared_ptr* out) { f3->type(), is_valid, {{0, 0}, {0, 1}, {1, 1}, {2, 1}, {3, 4}, {-1, -1}}, &a3); ArrayFromVector(f4->type(), is_valid, {0, -1, 1, 2, -2, 24}, &a4); + ArrayFromVector( + f5->type(), is_valid, + {{0, 0, 0}, {0, 0, 1}, {-1, 0, 1}, {-1, -2, -3}, {2, 4, 6}, {-3, -4, -5}}, &a5); - *out = RecordBatch::Make(schema, a0->length(), {a0, a1, a2, a3, a4}); + *out = RecordBatch::Make(schema, a0->length(), {a0, a1, a2, a3, a4, a5}); return Status::OK(); } diff --git a/cpp/src/arrow/json/test_common.h b/cpp/src/arrow/json/test_common.h index 1a1a3bd85d22a..488da071dafe7 100644 --- a/cpp/src/arrow/json/test_common.h +++ b/cpp/src/arrow/json/test_common.h @@ -114,6 +114,8 @@ struct GenerateImpl { Status Visit(const DayTimeIntervalType& t) { return NotImplemented(t); } + Status Visit(const MonthDayNanoIntervalType& t) { return NotImplemented(t); } + Status Visit(const DictionaryType& t) { return NotImplemented(t); } Status Visit(const ExtensionType& t) { return NotImplemented(t); } diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index 60cdaf0d6af70..13a3dacac6f70 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -208,6 +208,15 @@ class ArrayPrinter : public PrettyPrinter { return Status::OK(); } + Status WriteDataValues(const MonthDayNanoIntervalArray& array) { + WriteValues(array, [&](int64_t i) { + auto month_day_nanos = array.GetValue(i); + (*sink_) << month_day_nanos.months << "m" << month_day_nanos.days << "d" + << month_day_nanos.nanoseconds << "ns"; + }); + return Status::OK(); + } + Status WriteDataValues(const MonthIntervalArray& array) { const auto data = array.raw_values(); WriteValues(array, [&](int64_t i) { (*sink_) << data[i]; }); diff --git a/cpp/src/arrow/pretty_print_test.cc b/cpp/src/arrow/pretty_print_test.cc index 538e736518527..e06efa20ae106 100644 --- a/cpp/src/arrow/pretty_print_test.cc +++ b/cpp/src/arrow/pretty_print_test.cc @@ -278,6 +278,37 @@ TEST_F(TestPrettyPrint, DateTimeTypes) { } } +TEST_F(TestPrettyPrint, TestIntervalTypes) { + std::vector is_valid = {true, true, false, true, false}; + + { + std::vector values = { + {1, 2}, {-3, 4}, {}, {}, {}}; + static const char* expected = R"expected([ + 1d2ms, + -3d4ms, + null, + 0d0ms, + null +])expected"; + CheckPrimitive( + {0, 10}, is_valid, values, expected); + } + { + std::vector values = { + {1, 2, 3}, {-3, 4, -5}, {}, {}, {}}; + static const char* expected = R"expected([ + 1m2d3ns, + -3m4d-5ns, + null, + 0m0d0ns, + null +])expected"; + CheckPrimitive( + {0, 10}, is_valid, values, expected); + } +} + TEST_F(TestPrettyPrint, StructTypeBasic) { auto simple_1 = field("one", int32()); auto simple_2 = field("two", int32()); diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc index dc3f6ffc23021..60ba54f82cc11 100644 --- a/cpp/src/arrow/scalar.cc +++ b/cpp/src/arrow/scalar.cc @@ -70,7 +70,11 @@ struct ScalarHashImpl { } Status Visit(const DayTimeIntervalScalar& s) { - return StdHash(s.value.days) & StdHash(s.value.days); + return StdHash(s.value.days) & StdHash(s.value.milliseconds); + } + + Status Visit(const MonthDayNanoIntervalScalar& s) { + return StdHash(s.value.days) & StdHash(s.value.months) & StdHash(s.value.nanoseconds); } Status Visit(const Decimal128Scalar& s) { @@ -781,7 +785,8 @@ Status CastImpl(const BooleanScalar& from, NumericScalar* to) { // numeric to temporal template typename std::enable_if::value && - !std::is_same::value, + !std::is_same::value && + !std::is_same::value, Status>::type CastImpl(const NumericScalar& from, TemporalScalar* to) { to->value = static_cast(from.value); @@ -791,7 +796,8 @@ CastImpl(const NumericScalar& from, TemporalScalar* to) { // temporal to numeric template typename std::enable_if::value && - !std::is_same::value, + !std::is_same::value && + !std::is_same::value, Status>::type CastImpl(const TemporalScalar& from, NumericScalar* to) { to->value = static_cast(from.value); diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h index 7fd48be86f45a..81d9f2dc80eef 100644 --- a/cpp/src/arrow/scalar.h +++ b/cpp/src/arrow/scalar.h @@ -348,6 +348,11 @@ struct ARROW_EXPORT DayTimeIntervalScalar : public IntervalScalar::IntervalScalar; }; +struct ARROW_EXPORT MonthDayNanoIntervalScalar + : public IntervalScalar { + using IntervalScalar::IntervalScalar; +}; + struct ARROW_EXPORT DurationScalar : public TemporalScalar { using TemporalScalar::TemporalScalar; }; diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc index 6147201a32eb0..3e7c9b78c6b24 100644 --- a/cpp/src/arrow/testing/gtest_util.cc +++ b/cpp/src/arrow/testing/gtest_util.cc @@ -96,7 +96,8 @@ std::vector AllTypeIds() { Type::DENSE_UNION, Type::SPARSE_UNION, Type::DICTIONARY, - Type::EXTENSION}; + Type::EXTENSION, + Type::INTERVAL_MONTH_DAY_NANO}; } template diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h index 55734d26aff11..13bab35a66dfd 100644 --- a/cpp/src/arrow/testing/gtest_util.h +++ b/cpp/src/arrow/testing/gtest_util.h @@ -32,6 +32,7 @@ #include "arrow/array/builder_binary.h" #include "arrow/array/builder_primitive.h" +#include "arrow/array/builder_time.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/testing/gtest_compat.h" diff --git a/cpp/src/arrow/testing/json_internal.cc b/cpp/src/arrow/testing/json_internal.cc index e6a5723840ab1..5e5d67a3fc6e5 100644 --- a/cpp/src/arrow/testing/json_internal.cc +++ b/cpp/src/arrow/testing/json_internal.cc @@ -68,7 +68,10 @@ constexpr char kDays[] = "days"; constexpr char kDayTime[] = "DAY_TIME"; constexpr char kDuration[] = "duration"; constexpr char kMilliseconds[] = "milliseconds"; +constexpr char kMonths[] = "months"; +constexpr char kNanoseconds[] = "nanoseconds"; constexpr char kYearMonth[] = "YEAR_MONTH"; +constexpr char kMonthDayNano[] = "MONTH_DAY_NANO"; std::string GetFloatingPrecisionName(FloatingPointType::Precision precision) { switch (precision) { @@ -252,6 +255,9 @@ class SchemaWriter { case IntervalType::DAY_TIME: writer_->String(kDayTime); break; + case IntervalType::MONTH_DAY_NANO: + writer_->String(kMonthDayNano); + break; } } @@ -389,6 +395,9 @@ class SchemaWriter { Status Visit(const TimestampType& type) { return WritePrimitive("timestamp", type); } Status Visit(const DurationType& type) { return WritePrimitive(kDuration, type); } Status Visit(const MonthIntervalType& type) { return WritePrimitive("interval", type); } + Status Visit(const MonthDayNanoIntervalType& type) { + return WritePrimitive("interval", type); + } Status Visit(const DayTimeIntervalType& type) { return WritePrimitive("interval", type); @@ -535,6 +544,22 @@ class ArrayWriter { } } + void WriteDataValues(const MonthDayNanoIntervalArray& arr) { + for (int64_t i = 0; i < arr.length(); ++i) { + writer_->StartObject(); + if (arr.IsValid(i)) { + const MonthDayNanoIntervalType::MonthDayNanos dm = arr.GetValue(i); + writer_->Key(kMonths); + writer_->Int(dm.months); + writer_->Key(kDays); + writer_->Int(dm.days); + writer_->Key(kNanoseconds); + writer_->Int64(dm.nanoseconds); + } + writer_->EndObject(); + } + } + void WriteDataValues(const DayTimeIntervalArray& arr) { for (int64_t i = 0; i < arr.length(); ++i) { writer_->StartObject(); @@ -936,6 +961,8 @@ Status GetInterval(const RjObject& json_type, std::shared_ptr* type) { *type = day_time_interval(); } else if (unit_str == kYearMonth) { *type = month_interval(); + } else if (unit_str == kMonthDayNano) { + *type = month_day_nano_interval(); } else { return Status::Invalid("Invalid interval unit: " + unit_str); } @@ -1312,6 +1339,28 @@ class ArrayReader { return FinishBuilder(&builder); } + Status Visit(const MonthDayNanoIntervalType& type) { + MonthDayNanoIntervalBuilder builder(pool_); + + ARROW_ASSIGN_OR_RAISE(const auto json_data_arr, GetDataArray(obj_)); + + for (int i = 0; i < length_; ++i) { + if (!is_valid_[i]) { + RETURN_NOT_OK(builder.AppendNull()); + continue; + } + + const rj::Value& val = json_data_arr[i]; + DCHECK(val.IsObject()); + MonthDayNanoIntervalType::MonthDayNanos dm = {0, 0, 0}; + dm.months = val[kMonths].GetInt(); + dm.days = val[kDays].GetInt(); + dm.nanoseconds = val[kNanoseconds].GetInt64(); + RETURN_NOT_OK(builder.Append(dm)); + } + return FinishBuilder(&builder); + } + template enable_if_t::value && !is_decimal_type::value, Status> Visit(const T& type) { diff --git a/cpp/src/arrow/testing/random.cc b/cpp/src/arrow/testing/random.cc index bf95ea5e05148..cd3385e5aeeb1 100644 --- a/cpp/src/arrow/testing/random.cc +++ b/cpp/src/arrow/testing/random.cc @@ -647,7 +647,9 @@ struct RandomArrayGeneratorOfImpl { } template - enable_if_t::value && !std::is_same::value, + enable_if_t::value && + !std::is_same::value && + !std::is_same::value, Status> Visit(const T&) { auto max = std::numeric_limits::max(); @@ -867,6 +869,10 @@ std::shared_ptr RandomArrayGenerator::ArrayOf(const Field& field, int64_t // This isn't as flexible as it could be, but the array-of-structs layout of this // type means it's not a (useful) composition of other generators GENERATE_INTEGRAL_CASE_VIEW(Int64Type, DayTimeIntervalType); + case Type::type::INTERVAL_MONTH_DAY_NANO: { + return *FixedSizeBinary(length, /*byte_width=*/16, null_probability) + ->View(month_day_nano_interval()); + } GENERATE_LIST_CASE(ListArray); @@ -998,6 +1004,36 @@ std::shared_ptr GenerateBatch(const FieldVector& fields, int64_t length, SeedType seed) { return RandomArrayGenerator(seed).BatchOf(fields, length); } - } // namespace random + +void rand_day_millis(int64_t N, std::vector* out) { + const int random_seed = 0; + arrow::random::pcg32_fast gen(random_seed); + std::uniform_int_distribution d(std::numeric_limits::min(), + std::numeric_limits::max()); + out->resize(N, {}); + std::generate(out->begin(), out->end(), [&d, &gen] { + DayTimeIntervalType::DayMilliseconds tmp; + tmp.days = d(gen); + tmp.milliseconds = d(gen); + return tmp; + }); +} + +void rand_month_day_nanos(int64_t N, + std::vector* out) { + const int random_seed = 0; + arrow::random::pcg32_fast gen(random_seed); + std::uniform_int_distribution d(std::numeric_limits::min(), + std::numeric_limits::max()); + out->resize(N, {}); + std::generate(out->begin(), out->end(), [&d, &gen] { + MonthDayNanoIntervalType::MonthDayNanos tmp; + tmp.months = static_cast(d(gen)); + tmp.days = static_cast(d(gen)); + tmp.nanoseconds = d(gen); + return tmp; + }); +} + } // namespace arrow diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h index e9b6e426fbcda..c77ae2525c44f 100644 --- a/cpp/src/arrow/testing/random.h +++ b/cpp/src/arrow/testing/random.h @@ -454,6 +454,12 @@ std::shared_ptr GenerateArray(const Field& field, int64_t size, // Assorted functions // +ARROW_TESTING_EXPORT +void rand_day_millis(int64_t N, std::vector* out); +ARROW_TESTING_EXPORT +void rand_month_day_nanos(int64_t N, + std::vector* out); + template void randint(int64_t N, T lower, T upper, std::vector* out) { const int random_seed = 0; diff --git a/cpp/src/arrow/testing/random_test.cc b/cpp/src/arrow/testing/random_test.cc index 553028f8fb3f0..002c6c9b7e800 100644 --- a/cpp/src/arrow/testing/random_test.cc +++ b/cpp/src/arrow/testing/random_test.cc @@ -108,8 +108,9 @@ auto values = ::testing::Values( field("time32ms", time32(TimeUnit::MILLI)), field("time64ns", time64(TimeUnit::NANO)), field("time32s", time32(TimeUnit::SECOND)), field("time64us", time64(TimeUnit::MICRO)), field("month_interval", month_interval()), - field("daytime_interval", day_time_interval()), field("listint8", list(int8())), - field("listlistint8", list(list(int8()))), + field("daytime_interval", day_time_interval()), + field("month_day_nano_interval", month_day_nano_interval()), + field("listint8", list(int8())), field("listlistint8", list(list(int8()))), field("listint8emptynulls", list(int8()), true, key_value_metadata({{"force_empty_nulls", "true"}})), field("listint81024values", list(int8()), true, diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index e16b26e2465b6..d2adbf04b15d7 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -138,6 +138,7 @@ std::string ToString(Type::type id) { TO_STRING_CASE(TIME64) TO_STRING_CASE(TIMESTAMP) TO_STRING_CASE(INTERVAL_DAY_TIME) + TO_STRING_CASE(INTERVAL_MONTH_DAY_NANO) TO_STRING_CASE(INTERVAL_MONTHS) TO_STRING_CASE(DURATION) TO_STRING_CASE(STRING) @@ -1811,6 +1812,8 @@ static char IntervalTypeFingerprint(IntervalType::type unit) { return 'd'; case IntervalType::MONTHS: return 'M'; + case IntervalType::MONTH_DAY_NANO: + return 'N'; default: DCHECK(false) << "Unexpected IntervalType::type"; return '\0'; @@ -2118,6 +2121,10 @@ std::shared_ptr day_time_interval() { return std::make_shared(); } +std::shared_ptr month_day_nano_interval() { + return std::make_shared(); +} + std::shared_ptr month_interval() { return std::make_shared(); } diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 8eb6d87c3ee9f..337d4cd64a147 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -1250,7 +1250,7 @@ class ARROW_EXPORT TimestampType : public TemporalType, public ParametricType { // Base class for the different kinds of calendar intervals. class ARROW_EXPORT IntervalType : public TemporalType, public ParametricType { public: - enum type { MONTHS, DAY_TIME }; + enum type { MONTHS, DAY_TIME, MONTH_DAY_NANO }; virtual type interval_type() const = 0; @@ -1317,6 +1317,43 @@ class ARROW_EXPORT DayTimeIntervalType : public IntervalType { std::string name() const override { return "day_time_interval"; } }; +/// \brief Represents a number of months, days and nanoseconds between +/// two dates. +/// +/// All fields are independent from one another. +class ARROW_EXPORT MonthDayNanoIntervalType : public IntervalType { + public: + struct MonthDayNanos { + int32_t months; + int32_t days; + int64_t nanoseconds; + bool operator==(MonthDayNanos other) const { + return this->months == other.months && this->days == other.days && + this->nanoseconds == other.nanoseconds; + } + bool operator!=(MonthDayNanos other) const { return !(*this == other); } + }; + using c_type = MonthDayNanos; + using PhysicalType = MonthDayNanoIntervalType; + + static_assert(sizeof(MonthDayNanos) == 16, + "MonthDayNanos struct assumed to be of size 16 bytes"); + static constexpr Type::type type_id = Type::INTERVAL_MONTH_DAY_NANO; + + static constexpr const char* type_name() { return "month_day_nano_interval"; } + + IntervalType::type interval_type() const override { + return IntervalType::MONTH_DAY_NANO; + } + + MonthDayNanoIntervalType() : IntervalType(type_id) {} + + int bit_width() const override { return static_cast(sizeof(c_type) * CHAR_BIT); } + + std::string ToString() const override { return name(); } + std::string name() const override { return "month_day_nano_interval"; } +}; + /// \brief Represents an elapsed time without any relation to a calendar artifact. class ARROW_EXPORT DurationType : public TemporalType, public ParametricType { public: diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h index d77f519a3c5a3..7c52c87eb71d2 100644 --- a/cpp/src/arrow/type_fwd.h +++ b/cpp/src/arrow/type_fwd.h @@ -253,6 +253,11 @@ class DayTimeIntervalArray; class DayTimeIntervalBuilder; struct DayTimeIntervalScalar; +class MonthDayNanoIntervalType; +class MonthDayNanoIntervalArray; +class MonthDayNanoIntervalBuilder; +struct MonthDayNanoIntervalScalar; + class DurationType; using DurationArray = NumericArray; using DurationBuilder = NumericBuilder; @@ -394,6 +399,9 @@ struct Type { /// Like LIST, but with 64-bit offsets LARGE_LIST, + /// Calendar interval type with three fields. + INTERVAL_MONTH_DAY_NANO, + // Leave this at the end MAX_ID }; @@ -511,6 +519,9 @@ std::shared_ptr ARROW_EXPORT day_time_interval(); /// \brief Return a MonthIntervalType instance std::shared_ptr ARROW_EXPORT month_interval(); +/// \brief Return a MonthDayNanoIntervalType instance +std::shared_ptr ARROW_EXPORT month_day_nano_interval(); + /// \brief Create a TimestampType instance from its unit ARROW_EXPORT std::shared_ptr timestamp(TimeUnit::type unit); diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc index fd7fd01af67fa..f8294fc6d69c4 100644 --- a/cpp/src/arrow/type_test.cc +++ b/cpp/src/arrow/type_test.cc @@ -1347,6 +1347,23 @@ TEST(TestDayTimeIntervalType, ToString) { ASSERT_EQ("day_time_interval", t1->ToString()); } +TEST(TestMonthDayNanoIntervalType, Equals) { + MonthDayNanoIntervalType t1; + MonthDayNanoIntervalType t2; + MonthIntervalType t3; + DayTimeIntervalType t4; + + AssertTypeEqual(t1, t2); + AssertTypeNotEqual(t1, t3); + AssertTypeNotEqual(t1, t4); +} + +TEST(TestMonthDayNanoIntervalType, ToString) { + auto t1 = month_day_nano_interval(); + + ASSERT_EQ("month_day_nano_interval", t1->ToString()); +} + TEST(TestDurationType, Equals) { DurationType t1; DurationType t2; diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index f94ba513e6ac2..02337cf11784d 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -64,6 +64,7 @@ TYPE_ID_TRAIT(TIME32, Time32Type) TYPE_ID_TRAIT(TIME64, Time64Type) TYPE_ID_TRAIT(TIMESTAMP, TimestampType) TYPE_ID_TRAIT(INTERVAL_DAY_TIME, DayTimeIntervalType) +TYPE_ID_TRAIT(INTERVAL_MONTH_DAY_NANO, MonthDayNanoIntervalType) TYPE_ID_TRAIT(INTERVAL_MONTHS, MonthIntervalType) TYPE_ID_TRAIT(DURATION, DurationType) TYPE_ID_TRAIT(DECIMAL128, Decimal128Type) @@ -228,6 +229,20 @@ struct TypeTraits { static std::shared_ptr type_singleton() { return day_time_interval(); } }; +template <> +struct TypeTraits { + using ArrayType = MonthDayNanoIntervalArray; + using BuilderType = MonthDayNanoIntervalBuilder; + using ScalarType = MonthDayNanoIntervalScalar; + + static constexpr int64_t bytes_required(int64_t elements) { + return elements * + static_cast(sizeof(MonthDayNanoIntervalType::MonthDayNanos)); + } + constexpr static bool is_parameter_free = true; + static std::shared_ptr type_singleton() { return month_day_nano_interval(); } +}; + template <> struct TypeTraits { using ArrayType = MonthIntervalArray; @@ -880,6 +895,7 @@ static inline bool is_primitive(Type::type type_id) { case Type::TIMESTAMP: case Type::DURATION: case Type::INTERVAL_MONTHS: + case Type::INTERVAL_MONTH_DAY_NANO: case Type::INTERVAL_DAY_TIME: return true; default: @@ -977,6 +993,8 @@ static inline int bit_width(Type::type type_id) { return 32; case Type::INTERVAL_DAY_TIME: return 64; + case Type::INTERVAL_MONTH_DAY_NANO: + return 128; case Type::DECIMAL128: return 128; diff --git a/cpp/src/arrow/visitor.cc b/cpp/src/arrow/visitor.cc index 851785081c792..1f2771bc2e52c 100644 --- a/cpp/src/arrow/visitor.cc +++ b/cpp/src/arrow/visitor.cc @@ -56,6 +56,7 @@ ARRAY_VISITOR_DEFAULT(Time32Array) ARRAY_VISITOR_DEFAULT(Time64Array) ARRAY_VISITOR_DEFAULT(TimestampArray) ARRAY_VISITOR_DEFAULT(DayTimeIntervalArray) +ARRAY_VISITOR_DEFAULT(MonthDayNanoIntervalArray) ARRAY_VISITOR_DEFAULT(MonthIntervalArray) ARRAY_VISITOR_DEFAULT(DurationArray) ARRAY_VISITOR_DEFAULT(ListArray) @@ -104,6 +105,7 @@ TYPE_VISITOR_DEFAULT(Time32Type) TYPE_VISITOR_DEFAULT(Time64Type) TYPE_VISITOR_DEFAULT(TimestampType) TYPE_VISITOR_DEFAULT(DayTimeIntervalType) +TYPE_VISITOR_DEFAULT(MonthDayNanoIntervalType) TYPE_VISITOR_DEFAULT(MonthIntervalType) TYPE_VISITOR_DEFAULT(DurationType) TYPE_VISITOR_DEFAULT(Decimal128Type) @@ -153,6 +155,7 @@ SCALAR_VISITOR_DEFAULT(Time32Scalar) SCALAR_VISITOR_DEFAULT(Time64Scalar) SCALAR_VISITOR_DEFAULT(TimestampScalar) SCALAR_VISITOR_DEFAULT(DayTimeIntervalScalar) +SCALAR_VISITOR_DEFAULT(MonthDayNanoIntervalScalar) SCALAR_VISITOR_DEFAULT(MonthIntervalScalar) SCALAR_VISITOR_DEFAULT(DurationScalar) SCALAR_VISITOR_DEFAULT(Decimal128Scalar) diff --git a/cpp/src/arrow/visitor.h b/cpp/src/arrow/visitor.h index 0382e461199c7..18a5c7db0575d 100644 --- a/cpp/src/arrow/visitor.h +++ b/cpp/src/arrow/visitor.h @@ -51,6 +51,7 @@ class ARROW_EXPORT ArrayVisitor { virtual Status Visit(const Time64Array& array); virtual Status Visit(const TimestampArray& array); virtual Status Visit(const DayTimeIntervalArray& array); + virtual Status Visit(const MonthDayNanoIntervalArray& array); virtual Status Visit(const MonthIntervalArray& array); virtual Status Visit(const DurationArray& array); virtual Status Visit(const Decimal128Array& array); @@ -93,6 +94,7 @@ class ARROW_EXPORT TypeVisitor { virtual Status Visit(const Time32Type& type); virtual Status Visit(const Time64Type& type); virtual Status Visit(const TimestampType& type); + virtual Status Visit(const MonthDayNanoIntervalType& type); virtual Status Visit(const MonthIntervalType& type); virtual Status Visit(const DayTimeIntervalType& type); virtual Status Visit(const DurationType& type); @@ -137,6 +139,7 @@ class ARROW_EXPORT ScalarVisitor { virtual Status Visit(const Time64Scalar& scalar); virtual Status Visit(const TimestampScalar& scalar); virtual Status Visit(const DayTimeIntervalScalar& scalar); + virtual Status Visit(const MonthDayNanoIntervalScalar& type); virtual Status Visit(const MonthIntervalScalar& scalar); virtual Status Visit(const DurationScalar& scalar); virtual Status Visit(const Decimal128Scalar& scalar); diff --git a/cpp/src/arrow/visitor_inline.h b/cpp/src/arrow/visitor_inline.h index 132c35aeaa134..3321605ae7b82 100644 --- a/cpp/src/arrow/visitor_inline.h +++ b/cpp/src/arrow/visitor_inline.h @@ -65,6 +65,7 @@ namespace arrow { ACTION(Timestamp); \ ACTION(Time32); \ ACTION(Time64); \ + ACTION(MonthDayNanoInterval); \ ACTION(MonthInterval); \ ACTION(DayTimeInterval); \ ACTION(Decimal128); \ diff --git a/cpp/src/generated/File_generated.h b/cpp/src/generated/File_generated.h index 06953c4a04044..fe6578e538c46 100644 --- a/cpp/src/generated/File_generated.h +++ b/cpp/src/generated/File_generated.h @@ -14,7 +14,6 @@ namespace arrow { namespace flatbuf { struct Footer; -struct FooterBuilder; struct Block; @@ -56,7 +55,6 @@ FLATBUFFERS_STRUCT_END(Block, 24); /// Arrow File metadata /// struct Footer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef FooterBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_VERSION = 4, VT_SCHEMA = 6, @@ -97,7 +95,6 @@ struct Footer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct FooterBuilder { - typedef Footer Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_version(org::apache::arrow::flatbuf::MetadataVersion version) { diff --git a/cpp/src/generated/Message_generated.h b/cpp/src/generated/Message_generated.h index 822bec9952ba2..b08dcfd210c6b 100644 --- a/cpp/src/generated/Message_generated.h +++ b/cpp/src/generated/Message_generated.h @@ -18,16 +18,12 @@ namespace flatbuf { struct FieldNode; struct BodyCompression; -struct BodyCompressionBuilder; struct RecordBatch; -struct RecordBatchBuilder; struct DictionaryBatch; -struct DictionaryBatchBuilder; struct Message; -struct MessageBuilder; enum class CompressionType : int8_t { LZ4_FRAME = 0, @@ -179,7 +175,7 @@ bool VerifyMessageHeaderVector(flatbuffers::Verifier &verifier, const flatbuffer /// Metadata about a field at some level of a nested type tree (but not /// its children). /// -/// For example, a List with values [[1, 2, 3], null, [4], [5, 6], null] +/// For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` /// would have {length: 5, null_count: 2} for its List node, and {length: 6, /// null_count: 0} for its Int16 node, as separate FieldNode structs FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) FieldNode FLATBUFFERS_FINAL_CLASS { @@ -213,7 +209,6 @@ FLATBUFFERS_STRUCT_END(FieldNode, 16); /// bodies. Intended for use with RecordBatch but could be used for other /// message types struct BodyCompression FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef BodyCompressionBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_CODEC = 4, VT_METHOD = 6 @@ -235,7 +230,6 @@ struct BodyCompression FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct BodyCompressionBuilder { - typedef BodyCompression Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_codec(org::apache::arrow::flatbuf::CompressionType codec) { @@ -270,7 +264,6 @@ inline flatbuffers::Offset CreateBodyCompression( /// batch. Some systems call this a "row batch" internally and others a "record /// batch". struct RecordBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef RecordBatchBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_LENGTH = 4, VT_NODES = 6, @@ -313,7 +306,6 @@ struct RecordBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct RecordBatchBuilder { - typedef RecordBatch Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_length(int64_t length) { @@ -377,7 +369,6 @@ inline flatbuffers::Offset CreateRecordBatchDirect( /// may be spread across multiple dictionary batches by using the isDelta /// flag struct DictionaryBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef DictionaryBatchBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_ID = 4, VT_DATA = 6, @@ -406,7 +397,6 @@ struct DictionaryBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct DictionaryBatchBuilder { - typedef DictionaryBatch Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_id(int64_t id) { @@ -443,7 +433,6 @@ inline flatbuffers::Offset CreateDictionaryBatch( } struct Message FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef MessageBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_VERSION = 4, VT_HEADER_TYPE = 6, @@ -517,7 +506,6 @@ template<> inline const org::apache::arrow::flatbuf::SparseTensor *Message::head } struct MessageBuilder { - typedef Message Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_version(org::apache::arrow::flatbuf::MetadataVersion version) { diff --git a/cpp/src/generated/Schema_generated.h b/cpp/src/generated/Schema_generated.h index 91e01d337588e..1768b688e6aa6 100644 --- a/cpp/src/generated/Schema_generated.h +++ b/cpp/src/generated/Schema_generated.h @@ -12,81 +12,56 @@ namespace arrow { namespace flatbuf { struct Null; -struct NullBuilder; struct Struct_; -struct Struct_Builder; struct List; -struct ListBuilder; struct LargeList; -struct LargeListBuilder; struct FixedSizeList; -struct FixedSizeListBuilder; struct Map; -struct MapBuilder; struct Union; -struct UnionBuilder; struct Int; -struct IntBuilder; struct FloatingPoint; -struct FloatingPointBuilder; struct Utf8; -struct Utf8Builder; struct Binary; -struct BinaryBuilder; struct LargeUtf8; -struct LargeUtf8Builder; struct LargeBinary; -struct LargeBinaryBuilder; struct FixedSizeBinary; -struct FixedSizeBinaryBuilder; struct Bool; -struct BoolBuilder; struct Decimal; -struct DecimalBuilder; struct Date; -struct DateBuilder; struct Time; -struct TimeBuilder; struct Timestamp; -struct TimestampBuilder; struct Interval; -struct IntervalBuilder; struct Duration; -struct DurationBuilder; struct KeyValue; -struct KeyValueBuilder; struct DictionaryEncoding; -struct DictionaryEncodingBuilder; struct Field; -struct FieldBuilder; struct Buffer; struct Schema; -struct SchemaBuilder; enum class MetadataVersion : int16_t { /// 0.1.0 (October 2016). @@ -326,29 +301,32 @@ inline const char *EnumNameTimeUnit(TimeUnit e) { enum class IntervalUnit : int16_t { YEAR_MONTH = 0, DAY_TIME = 1, + MONTH_DAY_NANO = 2, MIN = YEAR_MONTH, - MAX = DAY_TIME + MAX = MONTH_DAY_NANO }; -inline const IntervalUnit (&EnumValuesIntervalUnit())[2] { +inline const IntervalUnit (&EnumValuesIntervalUnit())[3] { static const IntervalUnit values[] = { IntervalUnit::YEAR_MONTH, - IntervalUnit::DAY_TIME + IntervalUnit::DAY_TIME, + IntervalUnit::MONTH_DAY_NANO }; return values; } inline const char * const *EnumNamesIntervalUnit() { - static const char * const names[3] = { + static const char * const names[4] = { "YEAR_MONTH", "DAY_TIME", + "MONTH_DAY_NANO", nullptr }; return names; } inline const char *EnumNameIntervalUnit(IntervalUnit e) { - if (flatbuffers::IsOutRange(e, IntervalUnit::YEAR_MONTH, IntervalUnit::DAY_TIME)) return ""; + if (flatbuffers::IsOutRange(e, IntervalUnit::YEAR_MONTH, IntervalUnit::MONTH_DAY_NANO)) return ""; const size_t index = static_cast(e); return EnumNamesIntervalUnit()[index]; } @@ -634,7 +612,6 @@ FLATBUFFERS_STRUCT_END(Buffer, 16); /// These are stored in the flatbuffer in the Type union below struct Null FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef NullBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -642,7 +619,6 @@ struct Null FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct NullBuilder { - typedef Null Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit NullBuilder(flatbuffers::FlatBufferBuilder &_fbb) @@ -667,7 +643,6 @@ inline flatbuffers::Offset CreateNull( /// (according to the physical memory layout). We used Struct_ here as /// Struct is a reserved word in Flatbuffers struct Struct_ FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef Struct_Builder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -675,7 +650,6 @@ struct Struct_ FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct Struct_Builder { - typedef Struct_ Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit Struct_Builder(flatbuffers::FlatBufferBuilder &_fbb) @@ -697,7 +671,6 @@ inline flatbuffers::Offset CreateStruct_( } struct List FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef ListBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -705,7 +678,6 @@ struct List FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct ListBuilder { - typedef List Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit ListBuilder(flatbuffers::FlatBufferBuilder &_fbb) @@ -729,7 +701,6 @@ inline flatbuffers::Offset CreateList( /// Same as List, but with 64-bit offsets, allowing to represent /// extremely large data values. struct LargeList FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef LargeListBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -737,7 +708,6 @@ struct LargeList FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct LargeListBuilder { - typedef LargeList Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit LargeListBuilder(flatbuffers::FlatBufferBuilder &_fbb) @@ -759,7 +729,6 @@ inline flatbuffers::Offset CreateLargeList( } struct FixedSizeList FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef FixedSizeListBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_LISTSIZE = 4 }; @@ -775,7 +744,6 @@ struct FixedSizeList FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct FixedSizeListBuilder { - typedef FixedSizeList Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_listSize(int32_t listSize) { @@ -816,17 +784,17 @@ inline flatbuffers::Offset CreateFixedSizeList( /// not enforced. /// /// Map +/// ```text /// - child[0] entries: Struct /// - child[0] key: K /// - child[1] value: V -/// +/// ``` /// Neither the "entries" field nor the "key" field may be nullable. /// /// The metadata is structured so that Arrow systems without special handling /// for Map can make Map an alias for List. The "layout" attribute for the Map /// field must have the same contents as a List. struct Map FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef MapBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_KEYSSORTED = 4 }; @@ -842,7 +810,6 @@ struct Map FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct MapBuilder { - typedef Map Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_keysSorted(bool keysSorted) { @@ -871,9 +838,8 @@ inline flatbuffers::Offset CreateMap( /// A union is a complex type with children in Field /// By default ids in the type vector refer to the offsets in the children /// optionally typeIds provides an indirection between the child offset and the type id -/// for each child typeIds[offset] is the id used in the type vector +/// for each child `typeIds[offset]` is the id used in the type vector struct Union FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef UnionBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_MODE = 4, VT_TYPEIDS = 6 @@ -894,7 +860,6 @@ struct Union FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct UnionBuilder { - typedef Union Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_mode(org::apache::arrow::flatbuf::UnionMode mode) { @@ -937,7 +902,6 @@ inline flatbuffers::Offset CreateUnionDirect( } struct Int FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef IntBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_BITWIDTH = 4, VT_IS_SIGNED = 6 @@ -957,7 +921,6 @@ struct Int FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct IntBuilder { - typedef Int Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_bitWidth(int32_t bitWidth) { @@ -989,7 +952,6 @@ inline flatbuffers::Offset CreateInt( } struct FloatingPoint FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef FloatingPointBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_PRECISION = 4 }; @@ -1004,7 +966,6 @@ struct FloatingPoint FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct FloatingPointBuilder { - typedef FloatingPoint Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_precision(org::apache::arrow::flatbuf::Precision precision) { @@ -1032,7 +993,6 @@ inline flatbuffers::Offset CreateFloatingPoint( /// Unicode with UTF-8 encoding struct Utf8 FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef Utf8Builder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -1040,7 +1000,6 @@ struct Utf8 FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct Utf8Builder { - typedef Utf8 Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit Utf8Builder(flatbuffers::FlatBufferBuilder &_fbb) @@ -1063,7 +1022,6 @@ inline flatbuffers::Offset CreateUtf8( /// Opaque binary data struct Binary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef BinaryBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -1071,7 +1029,6 @@ struct Binary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct BinaryBuilder { - typedef Binary Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit BinaryBuilder(flatbuffers::FlatBufferBuilder &_fbb) @@ -1095,7 +1052,6 @@ inline flatbuffers::Offset CreateBinary( /// Same as Utf8, but with 64-bit offsets, allowing to represent /// extremely large data values. struct LargeUtf8 FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef LargeUtf8Builder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -1103,7 +1059,6 @@ struct LargeUtf8 FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct LargeUtf8Builder { - typedef LargeUtf8 Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit LargeUtf8Builder(flatbuffers::FlatBufferBuilder &_fbb) @@ -1127,7 +1082,6 @@ inline flatbuffers::Offset CreateLargeUtf8( /// Same as Binary, but with 64-bit offsets, allowing to represent /// extremely large data values. struct LargeBinary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef LargeBinaryBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -1135,7 +1089,6 @@ struct LargeBinary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct LargeBinaryBuilder { - typedef LargeBinary Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit LargeBinaryBuilder(flatbuffers::FlatBufferBuilder &_fbb) @@ -1157,7 +1110,6 @@ inline flatbuffers::Offset CreateLargeBinary( } struct FixedSizeBinary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef FixedSizeBinaryBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_BYTEWIDTH = 4 }; @@ -1173,7 +1125,6 @@ struct FixedSizeBinary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct FixedSizeBinaryBuilder { - typedef FixedSizeBinary Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_byteWidth(int32_t byteWidth) { @@ -1200,7 +1151,6 @@ inline flatbuffers::Offset CreateFixedSizeBinary( } struct Bool FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef BoolBuilder Builder; bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && verifier.EndTable(); @@ -1208,7 +1158,6 @@ struct Bool FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct BoolBuilder { - typedef Bool Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit BoolBuilder(flatbuffers::FlatBufferBuilder &_fbb) @@ -1230,11 +1179,10 @@ inline flatbuffers::Offset CreateBool( } /// Exact decimal value represented as an integer value in two's -/// complement. Currently only 128-bit (16-byte) integers are used but this may -/// be expanded in the future. The representation uses the endianness indicated +/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers +/// are used. The representation uses the endianness indicated /// in the Schema. struct Decimal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef DecimalBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_PRECISION = 4, VT_SCALE = 6, @@ -1248,10 +1196,8 @@ struct Decimal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { int32_t scale() const { return GetField(VT_SCALE, 0); } - /// Number of bits per value. The only accepted width right now is 128 but - /// this field exists for forward compatibility so that other bit widths may - /// be supported in future format versions. We use bitWidth for consistency - /// with Int::bitWidth. + /// Number of bits per value. The only accepted widths are 128 and 256. + /// We use bitWidth for consistency with Int::bitWidth. int32_t bitWidth() const { return GetField(VT_BITWIDTH, 128); } @@ -1265,7 +1211,6 @@ struct Decimal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct DecimalBuilder { - typedef Decimal Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_precision(int32_t precision) { @@ -1308,7 +1253,6 @@ inline flatbuffers::Offset CreateDecimal( /// leap seconds), where the values are evenly divisible by 86400000 /// * Days (32 bits) since the UNIX epoch struct Date FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef DateBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_UNIT = 4 }; @@ -1323,7 +1267,6 @@ struct Date FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct DateBuilder { - typedef Date Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_unit(org::apache::arrow::flatbuf::DateUnit unit) { @@ -1353,7 +1296,6 @@ inline flatbuffers::Offset CreateDate( /// - SECOND and MILLISECOND: 32 bits /// - MICROSECOND and NANOSECOND: 64 bits struct Time FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef TimeBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_UNIT = 4, VT_BITWIDTH = 6 @@ -1373,7 +1315,6 @@ struct Time FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { }; struct TimeBuilder { - typedef Time Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_unit(org::apache::arrow::flatbuf::TimeUnit unit) { @@ -1408,10 +1349,34 @@ inline flatbuffers::Offset