Skip to content

Commit

Permalink
ARROW-13628: [Format][C++][Java] Add MONTH_DAY_NANO interval type
Browse files Browse the repository at this point in the history
Trying to formalize [mailing list discussion](https://lists.apache.org/thread.html/rd919c4ed8ad2f2827a2d4f665d8da99e545ba92ef992b2e557831751%40%3Cdev.arrow.apache.org%3E)

Closes apache#10177 from emkornfield/interval

Lead-authored-by: Micah Kornfield <[email protected]>
Co-authored-by: emkornfield <[email protected]>
Co-authored-by: emkornfield <[email protected]>
Signed-off-by: Micah Kornfield <[email protected]>
  • Loading branch information
2 people authored and Matthew Topol committed Sep 12, 2021
1 parent 9289ba8 commit d1e981a
Show file tree
Hide file tree
Showing 60 changed files with 1,466 additions and 244 deletions.
1 change: 1 addition & 0 deletions cpp/src/arrow/array/array_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ struct ScalarFromArraySlotImpl {
Status Visit(const FixedSizeBinaryArray& a) { return Finish(a.GetString(index_)); }

Status Visit(const DayTimeIntervalArray& a) { return Finish(a.Value(index_)); }
Status Visit(const MonthDayNanoIntervalArray& a) { return Finish(a.Value(index_)); }

template <typename T>
Status Visit(const BaseListArray<T>& a) {
Expand Down
34 changes: 34 additions & 0 deletions cpp/src/arrow/array/array_primitive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,44 @@ DayTimeIntervalArray::DayTimeIntervalArray(const std::shared_ptr<DataType>& type
int64_t null_count, int64_t offset)
: PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {}

DayTimeIntervalArray::DayTimeIntervalArray(int64_t length,
const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap,
int64_t null_count, int64_t offset)
: PrimitiveArray(day_time_interval(), length, data, null_bitmap, null_count, offset) {
}

DayTimeIntervalType::DayMilliseconds DayTimeIntervalArray::GetValue(int64_t i) const {
DCHECK(i < length());
return *reinterpret_cast<const DayTimeIntervalType::DayMilliseconds*>(
raw_values_ + (i + data_->offset) * byte_width());
}

// ----------------------------------------------------------------------
// Month, day and Nanos interval

MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
const std::shared_ptr<ArrayData>& data) {
SetData(data);
}

MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& data, const std::shared_ptr<Buffer>& null_bitmap,
int64_t null_count, int64_t offset)
: PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {}

MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
int64_t length, const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count, int64_t offset)
: PrimitiveArray(month_day_nano_interval(), length, data, null_bitmap, null_count,
offset) {}

MonthDayNanoIntervalType::MonthDayNanos MonthDayNanoIntervalArray::GetValue(
int64_t i) const {
DCHECK(i < length());
return *reinterpret_cast<const MonthDayNanoIntervalType::MonthDayNanos*>(
raw_values_ + (i + data_->offset) * byte_width());
}

} // namespace arrow
31 changes: 31 additions & 0 deletions cpp/src/arrow/array/array_primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ class ARROW_EXPORT DayTimeIntervalArray : public PrimitiveArray {
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

DayTimeIntervalArray(int64_t length, const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

TypeClass::DayMilliseconds GetValue(int64_t i) const;
TypeClass::DayMilliseconds Value(int64_t i) const { return GetValue(i); }

Expand All @@ -132,4 +136,31 @@ class ARROW_EXPORT DayTimeIntervalArray : public PrimitiveArray {
const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); }
};

/// \brief Array of Month, Day and nanosecond values.
class ARROW_EXPORT MonthDayNanoIntervalArray : public PrimitiveArray {
public:
using TypeClass = MonthDayNanoIntervalType;

explicit MonthDayNanoIntervalArray(const std::shared_ptr<ArrayData>& data);

MonthDayNanoIntervalArray(const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

MonthDayNanoIntervalArray(int64_t length, const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

TypeClass::MonthDayNanos GetValue(int64_t i) const;
TypeClass::MonthDayNanos Value(int64_t i) const { return GetValue(i); }

// For compatibility with Take kernel.
TypeClass::MonthDayNanos GetView(int64_t i) const { return GetValue(i); }

int32_t byte_width() const { return sizeof(TypeClass::MonthDayNanos); }

const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); }
};

} // namespace arrow
117 changes: 85 additions & 32 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "arrow/array/builder_binary.h"
#include "arrow/array/builder_decimal.h"
#include "arrow/array/builder_dict.h"
#include "arrow/array/builder_time.h"
#include "arrow/array/data.h"
#include "arrow/array/util.h"
#include "arrow/buffer.h"
Expand Down Expand Up @@ -491,6 +492,7 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar)
static ScalarVector GetScalars() {
auto hello = Buffer::FromString("hello");
DayTimeIntervalType::DayMilliseconds daytime{1, 100};
MonthDayNanoIntervalType::MonthDayNanos month_day_nanos{5, 4, 100};

FieldVector union_fields{field("string", utf8()), field("number", int32()),
field("other_number", int32())};
Expand All @@ -513,6 +515,7 @@ static ScalarVector GetScalars() {
std::make_shared<TimestampScalar>(1111, timestamp(TimeUnit::MILLI)),
std::make_shared<MonthIntervalScalar>(1),
std::make_shared<DayTimeIntervalScalar>(daytime),
std::make_shared<MonthDayNanoIntervalScalar>(month_day_nanos),
std::make_shared<DurationScalar>(60, duration(TimeUnit::SECOND)),
std::make_shared<BinaryScalar>(hello),
std::make_shared<LargeBinaryScalar>(hello),
Expand Down Expand Up @@ -811,9 +814,10 @@ TEST_F(TestBuilder, TestResizeDownsize) {
template <typename Attrs>
class TestPrimitiveBuilder : public TestBuilder {
public:
typedef Attrs TestAttrs;
typedef typename Attrs::ArrayType ArrayType;
typedef typename Attrs::BuilderType BuilderType;
typedef typename Attrs::T T;
typedef typename Attrs::T CType;
typedef typename Attrs::Type Type;

virtual void SetUp() {
Expand Down Expand Up @@ -867,7 +871,7 @@ class TestPrimitiveBuilder : public TestBuilder {
ASSERT_TRUE(result->Equals(*expected));
}

void FlipValue(T* ptr) {
void FlipValue(CType* ptr) {
auto byteptr = reinterpret_cast<uint8_t*>(ptr);
*byteptr = static_cast<uint8_t>(~*byteptr);
}
Expand All @@ -876,7 +880,7 @@ class TestPrimitiveBuilder : public TestBuilder {
std::unique_ptr<BuilderType> builder_;
std::unique_ptr<BuilderType> builder_nn_;

std::vector<T> draws_;
std::vector<CType> draws_;
std::vector<uint8_t> valid_bytes_;
};

Expand Down Expand Up @@ -905,16 +909,20 @@ struct UniformIntSampleType<int8_t> {
\
static std::shared_ptr<DataType> type() { return std::make_shared<Type>(); }

#define PINT_DECL(CapType, c_type) \
struct P##CapType { \
PTYPE_DECL(CapType, c_type) \
static void draw(int64_t N, std::vector<T>* draws) { \
using sample_type = typename UniformIntSampleType<c_type>::type; \
const T lower = std::numeric_limits<T>::min(); \
const T upper = std::numeric_limits<T>::max(); \
randint(N, static_cast<sample_type>(lower), static_cast<sample_type>(upper), \
draws); \
} \
#define PINT_DECL(CapType, c_type) \
struct P##CapType { \
PTYPE_DECL(CapType, c_type) \
static void draw(int64_t N, std::vector<T>* draws) { \
using sample_type = typename UniformIntSampleType<c_type>::type; \
const T lower = std::numeric_limits<T>::min(); \
const T upper = std::numeric_limits<T>::max(); \
randint(N, static_cast<sample_type>(lower), static_cast<sample_type>(upper), \
draws); \
} \
static T Modify(T inp) { return inp / 2; } \
typedef \
typename std::conditional<std::is_unsigned<T>::value, uint64_t, int64_t>::type \
ConversionType; \
}

#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER) \
Expand All @@ -923,6 +931,8 @@ struct UniformIntSampleType<int8_t> {
static void draw(int64_t N, std::vector<T>* draws) { \
random_real(N, 0, LOWER, UPPER, draws); \
} \
static T Modify(T inp) { return inp / 2; } \
typedef double ConversionType; \
}

PINT_DECL(UInt8, uint8_t);
Expand All @@ -940,6 +950,33 @@ PFLOAT_DECL(Double, double, -1000.0, 1000.0);

struct PBoolean {
PTYPE_DECL(Boolean, uint8_t)
static T Modify(T inp) { return !inp; }
typedef int64_t ConversionType;
};

struct PDayTimeInterval {
using DayMilliseconds = DayTimeIntervalType::DayMilliseconds;
PTYPE_DECL(DayTimeInterval, DayMilliseconds);
static void draw(int64_t N, std::vector<T>* draws) { return rand_day_millis(N, draws); }

static DayMilliseconds Modify(DayMilliseconds inp) {
inp.days /= 2;
return inp;
}
typedef DayMilliseconds ConversionType;
};

struct PMonthDayNanoInterval {
using MonthDayNanos = MonthDayNanoIntervalType::MonthDayNanos;
PTYPE_DECL(MonthDayNanoInterval, MonthDayNanos);
static void draw(int64_t N, std::vector<T>* draws) {
return rand_month_day_nanos(N, draws);
}
static MonthDayNanos Modify(MonthDayNanos inp) {
inp.days /= 2;
return inp;
}
typedef MonthDayNanos ConversionType;
};

template <>
Expand All @@ -952,7 +989,7 @@ void TestPrimitiveBuilder<PBoolean>::RandomData(int64_t N, double pct_null) {
}

template <>
void TestPrimitiveBuilder<PBoolean>::FlipValue(T* ptr) {
void TestPrimitiveBuilder<PBoolean>::FlipValue(CType* ptr) {
*ptr = !*ptr;
}

Expand Down Expand Up @@ -1068,7 +1105,8 @@ TEST(NumericBuilderAccessors, TestSettersGetters) {
}

typedef ::testing::Types<PBoolean, PUInt8, PUInt16, PUInt32, PUInt64, PInt8, PInt16,
PInt32, PInt64, PFloat, PDouble>
PInt32, PInt64, PFloat, PDouble, PDayTimeInterval,
PMonthDayNanoInterval>
Primitives;

TYPED_TEST_SUITE(TestPrimitiveBuilder, Primitives);
Expand Down Expand Up @@ -1155,12 +1193,13 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendEmptyValue) {

// implementation detail: the value slots are 0-initialized
for (int64_t i = 0; i < result->length(); ++i) {
ASSERT_EQ(result->Value(i), 0);
typename TestFixture::CType t{};
ASSERT_EQ(result->Value(i), t);
}
}

TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 1000;

Expand Down Expand Up @@ -1190,7 +1229,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
}

TYPED_TEST(TestPrimitiveBuilder, Equality) {
DECL_T();
typedef typename TestFixture::CType T;

const int64_t size = 1000;
this->RandomData(size);
Expand Down Expand Up @@ -1226,7 +1265,7 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) {
}

TYPED_TEST(TestPrimitiveBuilder, SliceEquality) {
DECL_T();
typedef typename TestFixture::CType T;

const int64_t size = 1000;
this->RandomData(size);
Expand Down Expand Up @@ -1259,7 +1298,7 @@ TYPED_TEST(TestPrimitiveBuilder, SliceEquality) {
}

TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
DECL_T();
typedef typename TestFixture::CType T;

const int64_t size = 10000;

Expand Down Expand Up @@ -1315,7 +1354,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
}

TYPED_TEST(TestPrimitiveBuilder, TestAppendValues) {
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 10000;
this->RandomData(size);
Expand Down Expand Up @@ -1351,7 +1390,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValues) {
}

TYPED_TEST(TestPrimitiveBuilder, TestTypedFinish) {
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 1000;
this->RandomData(size);
Expand Down Expand Up @@ -1403,23 +1442,25 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterNullValid) {
}

TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) {
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 10000;
this->RandomData(size);

auto& draws = this->draws_;
auto& valid_bytes = this->valid_bytes_;

auto halve = [&draws](int64_t index) { return draws[index] / 2; };
auto halve = [&draws](int64_t index) {
return TestFixture::TestAttrs::Modify(draws[index]);
};
auto lazy_iter = internal::MakeLazyRange(halve, size);

ASSERT_OK(this->builder_->AppendValues(lazy_iter.begin(), lazy_iter.end(),
valid_bytes.begin()));

std::vector<T> halved;
transform(draws.begin(), draws.end(), back_inserter(halved),
[](T in) { return in / 2; });
[](T in) { return TestFixture::TestAttrs::Modify(in); });

std::shared_ptr<Array> result;
FinishAndCheckPadding(this->builder_.get(), &result);
Expand All @@ -1433,12 +1474,9 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) {
}

TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterConverted) {
DECL_T();
typedef typename TestFixture::CType T;
// find type we can safely convert the tested values to and from
using conversion_type =
typename std::conditional<std::is_floating_point<T>::value, double,
typename std::conditional<std::is_unsigned<T>::value,
uint64_t, int64_t>::type>::type;
using conversion_type = typename TestFixture::TestAttrs::ConversionType;

int64_t size = 10000;
this->RandomData(size);
Expand Down Expand Up @@ -1474,7 +1512,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterConverted) {
}

TYPED_TEST(TestPrimitiveBuilder, TestZeroPadded) {
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 10000;
this->RandomData(size);
Expand All @@ -1493,7 +1531,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestZeroPadded) {

TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesStdBool) {
// ARROW-1383
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 10000;
this->RandomData(size);
Expand Down Expand Up @@ -3161,4 +3199,19 @@ TEST(TestSwapEndianArrayData, ExtensionType) {
AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
}

TEST(TestSwapEndianArrayData, MonthDayNanoInterval) {
auto array = ArrayFromJSON(month_day_nano_interval(), R"([[0, 1, 2],
[5000, 200, 3000000000]])");
auto expected_array =
ArrayFromJSON(month_day_nano_interval(), R"([[0, 16777216, 144115188075855872],
[-2012020736, -939524096, 26688110733557760]])");

auto swap_array = MakeArray(*::arrow::internal::SwapEndianArrayData(array->data()));
EXPECT_TRUE(!swap_array->Equals(array));
ASSERT_ARRAYS_EQUAL(*swap_array, *expected_array);
ASSERT_ARRAYS_EQUAL(
*MakeArray(*::arrow::internal::SwapEndianArrayData(swap_array->data())), *array);
ASSERT_OK(swap_array->ValidateFull());
}

} // namespace arrow
Loading

0 comments on commit d1e981a

Please sign in to comment.