Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-13628: [Format][C++][Java] Add MONTH_DAY_NANO interval type #10177

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
ae3b2ed
Add MONTH_DAY_NANO interval type
emkornfield Apr 28, 2021
750f502
address some comments
emkornfield Apr 28, 2021
8e83a36
address comments
emkornfield Apr 29, 2021
d9cf7d3
Update format/Schema.fbs
emkornfield May 2, 2021
2c413a0
Update format/Schema.fbs
emkornfield May 2, 2021
93c538a
Update Schema.fbs
emkornfield May 3, 2021
0b2b4b6
Update format/Schema.fbs
emkornfield May 3, 2021
9f38756
wip
emkornfield May 24, 2021
85d82f1
Basic C++ support
emkornfield Aug 10, 2021
153d9ac
Java compiling. Unit tests still needed
emkornfield Aug 11, 2021
68ab196
add archery, fix bugs found in integration
emkornfield Aug 11, 2021
849325f
fix_lint
emkornfield Aug 11, 2021
8201e7b
fix benchmarks
emkornfield Aug 11, 2021
9a47bd9
fix type in diff test
emkornfield Aug 11, 2021
64186d7
fix lint
emkornfield Aug 11, 2021
2f54913
Update Schema.fbs
emkornfield Aug 11, 2021
51e966f
rename TestType to see if it helps MSVC
emkornfield Aug 11, 2021
7e749a1
try something else to fix msvc
emkornfield Aug 12, 2021
9a41c36
try change to typed_test_suite to see if that affects msvc
emkornfield Aug 12, 2021
22a94ef
Revert "try change to typed_test_suite to see if that affects msvc"
emkornfield Aug 13, 2021
b5fd2a3
try renaming T on class
emkornfield Aug 13, 2021
79a48ca
Add java unit tests
emkornfield Aug 14, 2021
a20f4d1
update headers
emkornfield Aug 14, 2021
10847f9
Fix lint and tests
emkornfield Aug 14, 2021
3acd0ae
address C++ comments
emkornfield Aug 17, 2021
57beacb
address java comments
emkornfield Aug 17, 2021
ed1e8ac
add random test for month_day_nano
emkornfield Aug 17, 2021
9d67971
add link macro
emkornfield Aug 17, 2021
da7457b
use pcg32
emkornfield Aug 24, 2021
9b39567
add negative values for test
emkornfield Aug 24, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/array/array_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ struct ScalarFromArraySlotImpl {
Status Visit(const FixedSizeBinaryArray& a) { return Finish(a.GetString(index_)); }

Status Visit(const DayTimeIntervalArray& a) { return Finish(a.Value(index_)); }
Status Visit(const MonthDayNanoIntervalArray& a) { return Finish(a.Value(index_)); }

template <typename T>
Status Visit(const BaseListArray<T>& a) {
Expand Down
34 changes: 34 additions & 0 deletions cpp/src/arrow/array/array_primitive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,44 @@ DayTimeIntervalArray::DayTimeIntervalArray(const std::shared_ptr<DataType>& type
int64_t null_count, int64_t offset)
: PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {}

DayTimeIntervalArray::DayTimeIntervalArray(int64_t length,
const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap,
int64_t null_count, int64_t offset)
: PrimitiveArray(day_time_interval(), length, data, null_bitmap, null_count, offset) {
}

DayTimeIntervalType::DayMilliseconds DayTimeIntervalArray::GetValue(int64_t i) const {
DCHECK(i < length());
return *reinterpret_cast<const DayTimeIntervalType::DayMilliseconds*>(
raw_values_ + (i + data_->offset) * byte_width());
}

// ----------------------------------------------------------------------
// Month, day and Nanos interval

MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
const std::shared_ptr<ArrayData>& data) {
SetData(data);
}

MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& data, const std::shared_ptr<Buffer>& null_bitmap,
int64_t null_count, int64_t offset)
: PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {}

MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
int64_t length, const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count, int64_t offset)
: PrimitiveArray(month_day_nano_interval(), length, data, null_bitmap, null_count,
offset) {}

MonthDayNanoIntervalType::MonthDayNanos MonthDayNanoIntervalArray::GetValue(
int64_t i) const {
DCHECK(i < length());
return *reinterpret_cast<const MonthDayNanoIntervalType::MonthDayNanos*>(
raw_values_ + (i + data_->offset) * byte_width());
}

} // namespace arrow
31 changes: 31 additions & 0 deletions cpp/src/arrow/array/array_primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ class ARROW_EXPORT DayTimeIntervalArray : public PrimitiveArray {
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

DayTimeIntervalArray(int64_t length, const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

TypeClass::DayMilliseconds GetValue(int64_t i) const;
TypeClass::DayMilliseconds Value(int64_t i) const { return GetValue(i); }

Expand All @@ -132,4 +136,31 @@ class ARROW_EXPORT DayTimeIntervalArray : public PrimitiveArray {
const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); }
};

/// \brief Array of Month, Day and nanosecond values.
class ARROW_EXPORT MonthDayNanoIntervalArray : public PrimitiveArray {
public:
using TypeClass = MonthDayNanoIntervalType;

explicit MonthDayNanoIntervalArray(const std::shared_ptr<ArrayData>& data);

MonthDayNanoIntervalArray(const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

MonthDayNanoIntervalArray(int64_t length, const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

TypeClass::MonthDayNanos GetValue(int64_t i) const;
TypeClass::MonthDayNanos Value(int64_t i) const { return GetValue(i); }

// For compatibility with Take kernel.
TypeClass::MonthDayNanos GetView(int64_t i) const { return GetValue(i); }

int32_t byte_width() const { return sizeof(TypeClass::MonthDayNanos); }

const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); }
};

} // namespace arrow
117 changes: 85 additions & 32 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "arrow/array/builder_binary.h"
#include "arrow/array/builder_decimal.h"
#include "arrow/array/builder_dict.h"
#include "arrow/array/builder_time.h"
#include "arrow/array/data.h"
#include "arrow/array/util.h"
#include "arrow/buffer.h"
Expand Down Expand Up @@ -475,6 +476,7 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar)
static ScalarVector GetScalars() {
auto hello = Buffer::FromString("hello");
DayTimeIntervalType::DayMilliseconds daytime{1, 100};
MonthDayNanoIntervalType::MonthDayNanos month_day_nanos{5, 4, 100};

FieldVector union_fields{field("string", utf8()), field("number", int32()),
field("other_number", int32())};
Expand All @@ -493,6 +495,7 @@ static ScalarVector GetScalars() {
std::make_shared<TimestampScalar>(1111, timestamp(TimeUnit::MILLI)),
std::make_shared<MonthIntervalScalar>(1),
std::make_shared<DayTimeIntervalScalar>(daytime),
std::make_shared<MonthDayNanoIntervalScalar>(month_day_nanos),
std::make_shared<DurationScalar>(60, duration(TimeUnit::SECOND)),
std::make_shared<BinaryScalar>(hello), std::make_shared<LargeBinaryScalar>(hello),
std::make_shared<FixedSizeBinaryScalar>(
Expand Down Expand Up @@ -710,9 +713,10 @@ TEST_F(TestBuilder, TestResizeDownsize) {
template <typename Attrs>
class TestPrimitiveBuilder : public TestBuilder {
public:
typedef Attrs TestAttrs;
typedef typename Attrs::ArrayType ArrayType;
typedef typename Attrs::BuilderType BuilderType;
typedef typename Attrs::T T;
typedef typename Attrs::T CType;
typedef typename Attrs::Type Type;

virtual void SetUp() {
Expand Down Expand Up @@ -766,7 +770,7 @@ class TestPrimitiveBuilder : public TestBuilder {
ASSERT_TRUE(result->Equals(*expected));
}

void FlipValue(T* ptr) {
void FlipValue(CType* ptr) {
auto byteptr = reinterpret_cast<uint8_t*>(ptr);
*byteptr = static_cast<uint8_t>(~*byteptr);
}
Expand All @@ -775,7 +779,7 @@ class TestPrimitiveBuilder : public TestBuilder {
std::unique_ptr<BuilderType> builder_;
std::unique_ptr<BuilderType> builder_nn_;

std::vector<T> draws_;
std::vector<CType> draws_;
std::vector<uint8_t> valid_bytes_;
};

Expand Down Expand Up @@ -804,16 +808,20 @@ struct UniformIntSampleType<int8_t> {
\
static std::shared_ptr<DataType> type() { return std::make_shared<Type>(); }

#define PINT_DECL(CapType, c_type) \
struct P##CapType { \
PTYPE_DECL(CapType, c_type) \
static void draw(int64_t N, std::vector<T>* draws) { \
using sample_type = typename UniformIntSampleType<c_type>::type; \
const T lower = std::numeric_limits<T>::min(); \
const T upper = std::numeric_limits<T>::max(); \
randint(N, static_cast<sample_type>(lower), static_cast<sample_type>(upper), \
draws); \
} \
#define PINT_DECL(CapType, c_type) \
struct P##CapType { \
PTYPE_DECL(CapType, c_type) \
static void draw(int64_t N, std::vector<T>* draws) { \
using sample_type = typename UniformIntSampleType<c_type>::type; \
const T lower = std::numeric_limits<T>::min(); \
const T upper = std::numeric_limits<T>::max(); \
randint(N, static_cast<sample_type>(lower), static_cast<sample_type>(upper), \
draws); \
} \
static T Modify(T inp) { return inp / 2; } \
typedef \
typename std::conditional<std::is_unsigned<T>::value, uint64_t, int64_t>::type \
ConversionType; \
}

#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER) \
Expand All @@ -822,6 +830,8 @@ struct UniformIntSampleType<int8_t> {
static void draw(int64_t N, std::vector<T>* draws) { \
random_real(N, 0, LOWER, UPPER, draws); \
} \
static T Modify(T inp) { return inp / 2; } \
typedef double ConversionType; \
}

PINT_DECL(UInt8, uint8_t);
Expand All @@ -839,6 +849,33 @@ PFLOAT_DECL(Double, double, -1000.0, 1000.0);

struct PBoolean {
PTYPE_DECL(Boolean, uint8_t)
static T Modify(T inp) { return !inp; }
typedef int64_t ConversionType;
};

struct PDayTimeInterval {
using DayMilliseconds = DayTimeIntervalType::DayMilliseconds;
PTYPE_DECL(DayTimeInterval, DayMilliseconds);
static void draw(int64_t N, std::vector<T>* draws) { return rand_day_millis(N, draws); }

static DayMilliseconds Modify(DayMilliseconds inp) {
inp.days /= 2;
return inp;
}
typedef DayMilliseconds ConversionType;
};

struct PMonthDayNanoInterval {
using MonthDayNanos = MonthDayNanoIntervalType::MonthDayNanos;
PTYPE_DECL(MonthDayNanoInterval, MonthDayNanos);
static void draw(int64_t N, std::vector<T>* draws) {
return rand_month_day_nanos(N, draws);
}
static MonthDayNanos Modify(MonthDayNanos inp) {
inp.days /= 2;
return inp;
}
typedef MonthDayNanos ConversionType;
};

template <>
Expand All @@ -851,7 +888,7 @@ void TestPrimitiveBuilder<PBoolean>::RandomData(int64_t N, double pct_null) {
}

template <>
void TestPrimitiveBuilder<PBoolean>::FlipValue(T* ptr) {
void TestPrimitiveBuilder<PBoolean>::FlipValue(CType* ptr) {
*ptr = !*ptr;
}

Expand Down Expand Up @@ -967,7 +1004,8 @@ TEST(NumericBuilderAccessors, TestSettersGetters) {
}

typedef ::testing::Types<PBoolean, PUInt8, PUInt16, PUInt32, PUInt64, PInt8, PInt16,
PInt32, PInt64, PFloat, PDouble>
PInt32, PInt64, PFloat, PDouble, PDayTimeInterval,
PMonthDayNanoInterval>
Primitives;

TYPED_TEST_SUITE(TestPrimitiveBuilder, Primitives);
Expand Down Expand Up @@ -1054,12 +1092,13 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendEmptyValue) {

// implementation detail: the value slots are 0-initialized
for (int64_t i = 0; i < result->length(); ++i) {
ASSERT_EQ(result->Value(i), 0);
typename TestFixture::CType t{};
ASSERT_EQ(result->Value(i), t);
}
}

TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 1000;

Expand Down Expand Up @@ -1089,7 +1128,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
}

TYPED_TEST(TestPrimitiveBuilder, Equality) {
DECL_T();
typedef typename TestFixture::CType T;

const int64_t size = 1000;
this->RandomData(size);
Expand Down Expand Up @@ -1125,7 +1164,7 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) {
}

TYPED_TEST(TestPrimitiveBuilder, SliceEquality) {
DECL_T();
typedef typename TestFixture::CType T;

const int64_t size = 1000;
this->RandomData(size);
Expand Down Expand Up @@ -1158,7 +1197,7 @@ TYPED_TEST(TestPrimitiveBuilder, SliceEquality) {
}

TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
DECL_T();
typedef typename TestFixture::CType T;

const int64_t size = 10000;

Expand Down Expand Up @@ -1214,7 +1253,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
}

TYPED_TEST(TestPrimitiveBuilder, TestAppendValues) {
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 10000;
this->RandomData(size);
Expand Down Expand Up @@ -1250,7 +1289,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValues) {
}

TYPED_TEST(TestPrimitiveBuilder, TestTypedFinish) {
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 1000;
this->RandomData(size);
Expand Down Expand Up @@ -1302,23 +1341,25 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterNullValid) {
}

TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) {
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 10000;
this->RandomData(size);

auto& draws = this->draws_;
auto& valid_bytes = this->valid_bytes_;

auto halve = [&draws](int64_t index) { return draws[index] / 2; };
auto halve = [&draws](int64_t index) {
return TestFixture::TestAttrs::Modify(draws[index]);
};
auto lazy_iter = internal::MakeLazyRange(halve, size);

ASSERT_OK(this->builder_->AppendValues(lazy_iter.begin(), lazy_iter.end(),
valid_bytes.begin()));

std::vector<T> halved;
transform(draws.begin(), draws.end(), back_inserter(halved),
[](T in) { return in / 2; });
[](T in) { return TestFixture::TestAttrs::Modify(in); });

std::shared_ptr<Array> result;
FinishAndCheckPadding(this->builder_.get(), &result);
Expand All @@ -1332,12 +1373,9 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) {
}

TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterConverted) {
DECL_T();
typedef typename TestFixture::CType T;
// find type we can safely convert the tested values to and from
using conversion_type =
typename std::conditional<std::is_floating_point<T>::value, double,
typename std::conditional<std::is_unsigned<T>::value,
uint64_t, int64_t>::type>::type;
using conversion_type = typename TestFixture::TestAttrs::ConversionType;

int64_t size = 10000;
this->RandomData(size);
Expand Down Expand Up @@ -1373,7 +1411,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterConverted) {
}

TYPED_TEST(TestPrimitiveBuilder, TestZeroPadded) {
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 10000;
this->RandomData(size);
Expand All @@ -1392,7 +1430,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestZeroPadded) {

TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesStdBool) {
// ARROW-1383
DECL_T();
typedef typename TestFixture::CType T;

int64_t size = 10000;
this->RandomData(size);
Expand Down Expand Up @@ -3060,4 +3098,19 @@ TEST(TestSwapEndianArrayData, ExtensionType) {
AssertArrayDataEqualsWithSwapEndian(test_data, expected_data);
}

TEST(TestSwapEndianArrayData, MonthDayNanoInterval) {
auto array = ArrayFromJSON(month_day_nano_interval(), R"([[0, 1, 2],
[5000, 200, 3000000000]])");
auto expected_array =
ArrayFromJSON(month_day_nano_interval(), R"([[0, 16777216, 144115188075855872],
[-2012020736, -939524096, 26688110733557760]])");

auto swap_array = MakeArray(*::arrow::internal::SwapEndianArrayData(array->data()));
EXPECT_TRUE(!swap_array->Equals(array));
ASSERT_ARRAYS_EQUAL(*swap_array, *expected_array);
ASSERT_ARRAYS_EQUAL(
*MakeArray(*::arrow::internal::SwapEndianArrayData(swap_array->data())), *array);
ASSERT_OK(swap_array->ValidateFull());
}
pitrou marked this conversation as resolved.
Show resolved Hide resolved

} // namespace arrow
Loading