Skip to content

Commit

Permalink
[Enhancement] Merge orc type checking logic in OrcMapping (StarRocks#…
Browse files Browse the repository at this point in the history
…36127)

Signed-off-by: Smith Cruise <[email protected]>
  • Loading branch information
Smith-Cruise authored and lipenglin committed Dec 2, 2023
1 parent da73af4 commit fdaab7e
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 23 deletions.
39 changes: 16 additions & 23 deletions be/src/formats/orc/orc_mapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ Status OrcMapping::set_include_column_id_by_type(const OrcMappingPtr& mapping, c
}
}
} else {
DCHECK(false) << "Unreachable!";
return Status::InternalError("Unreachable");
}
return Status::OK();
}
Expand Down Expand Up @@ -125,19 +125,29 @@ Status OrcMappingFactory::_check_orc_type_can_convert_2_logical_type(const orc::
const TypeDescriptor& slot_target_type,
const OrcMappingOptions& options) {
bool can_convert = true;
// check orc type -> slot type desc
if (orc_source_type->getKind() == orc::TypeKind::LIST) {
can_convert = slot_target_type.is_array_type();
can_convert &= slot_target_type.is_array_type();
} else if (orc_source_type->getKind() == orc::TypeKind::MAP) {
can_convert = slot_target_type.is_map_type();
can_convert &= slot_target_type.is_map_type();
} else if (orc_source_type->getKind() == orc::TypeKind::STRUCT) {
can_convert = slot_target_type.is_struct_type();
can_convert &= slot_target_type.is_struct_type();
}

// check slot type desc -> orc type
if (slot_target_type.is_array_type()) {
can_convert &= orc_source_type->getKind() == orc::TypeKind::LIST;
} else if (slot_target_type.is_map_type()) {
can_convert &= orc_source_type->getKind() == orc::TypeKind::MAP;
} else if (slot_target_type.is_struct_type()) {
can_convert &= orc_source_type->getKind() == orc::TypeKind::STRUCT;
}

//TODO Other logical type not check now!

if (!can_convert) {
return Status::NotSupported(
strings::Substitute("Not support to convert orc's type from $0 to $1, filename = $2",
strings::Substitute("Orc's type $0 and Slot's type $1 can't convert to each other, filename = $2",
orc_source_type->toString(), slot_target_type.debug_string(), options.filename));
}
return Status::OK();
Expand Down Expand Up @@ -242,12 +252,6 @@ Status OrcMappingFactory::_set_child_mapping(const OrcMappingPtr& mapping, const
const orc::Type* orc_type, const OrcMappingOptions& options) {
DCHECK(origin_type.is_complex_type());
if (origin_type.type == LogicalType::TYPE_STRUCT) {
if (orc_type->getKind() != orc::TypeKind::STRUCT) {
return Status::InternalError(strings::Substitute(
"Orc nest type check error: expect type in this layer is STRUCT, actual type is $0, ",
orc_type->toString()));
}

std::unordered_map<std::string, size_t> tmp_orc_fieldname_2_pos;
for (size_t i = 0; i < orc_type->getSubtypeCount(); i++) {
std::string field_name = Utils::format_name(orc_type->getFieldName(i), options.case_sensitive);
Expand Down Expand Up @@ -277,11 +281,6 @@ Status OrcMappingFactory::_set_child_mapping(const OrcMappingPtr& mapping, const
mapping->add_mapping(index, need_add_child_mapping, orc_child_type);
}
} else if (origin_type.type == LogicalType::TYPE_ARRAY) {
if (orc_type->getKind() != orc::TypeKind::LIST) {
return Status::InternalError(strings::Substitute(
"Orc nest type check error: expect type in this layer is LIST, actual type is $0, ",
orc_type->toString()));
}
const orc::Type* orc_child_type = orc_type->getSubtype(0);
const TypeDescriptor& origin_child_type = origin_type.children[0];
// Check Array's element can be converted
Expand All @@ -296,12 +295,6 @@ Status OrcMappingFactory::_set_child_mapping(const OrcMappingPtr& mapping, const

mapping->add_mapping(0, need_add_child_mapping, orc_child_type);
} else if (origin_type.type == LogicalType::TYPE_MAP) {
if (orc_type->getKind() != orc::TypeKind::MAP) {
return Status::InternalError(strings::Substitute(
"Orc nest type check error: expect type in this layer is MAP, actual type is $0, ",
orc_type->toString()));
}

// Check Map's key can be converted
if (!origin_type.children[0].is_unknown_type()) {
RETURN_IF_ERROR(_check_orc_type_can_convert_2_logical_type(orc_type->getSubtype(0), origin_type.children[0],
Expand Down Expand Up @@ -330,7 +323,7 @@ Status OrcMappingFactory::_set_child_mapping(const OrcMappingPtr& mapping, const
}
mapping->add_mapping(1, need_add_value_child_mapping, orc_type->getSubtype(1));
} else {
DCHECK(false) << "Unreachable";
return Status::InternalError("Unreachable");
}
return Status::OK();
}
Expand Down
22 changes: 22 additions & 0 deletions be/test/formats/orc/orc_chunk_reader_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2135,6 +2135,7 @@ TEST_F(OrcChunkReaderTest, TestOrcIcebergPositionDelete) {
}

TEST_F(OrcChunkReaderTest, TestTypeMismatched) {
// Type: struct<col_string:string,col_map:map<int,decimal(9,9)>>
static const std::string input_orc_file = "./be/test/exec/test_data/orc_scanner/map_type_mismatched.orc";

SlotDesc c0{"col_string", TypeDescriptor::from_logical_type(LogicalType::TYPE_VARCHAR)};
Expand Down Expand Up @@ -2171,6 +2172,27 @@ TEST_F(OrcChunkReaderTest, TestTypeMismatched) {
EXPECT_EQ("['1', {-2147483648:'0.999999999'}]", result->debug_row(0));
}

TEST_F(OrcChunkReaderTest, TestTypeMismatchedArray2String) {
// Type: struct<col_string:string,col_map:map<int,decimal(9,9)>>
static const std::string input_orc_file = "./be/test/exec/test_data/orc_scanner/map_type_mismatched.orc";

SlotDesc c0{"col_string", TypeDescriptor::from_logical_type(LogicalType::TYPE_ARRAY)};
c0.type.children.push_back(TypeDescriptor::from_logical_type(LogicalType::TYPE_INT));

SlotDesc slot_descs[] = {c0, {""}};

std::vector<SlotDescriptor*> src_slot_descriptors;
ObjectPool pool;
create_slot_descriptors(_runtime_state.get(), &pool, &src_slot_descriptors, slot_descs);

OrcChunkReader reader(_runtime_state->chunk_size(), src_slot_descriptors);
reader.set_use_orc_column_names(true);
reader.set_case_sensitive(true);
auto input_stream = orc::readLocalFile(input_orc_file);
Status st = reader.init(std::move(input_stream));
EXPECT_FALSE(st.ok());
}

TEST_F(OrcChunkReaderTest, TestTypeMismatchedString2Double) {
static const std::string input_orc_file = "./be/test/exec/test_data/orc_scanner/string-2-double.orc";

Expand Down

0 comments on commit fdaab7e

Please sign in to comment.