From 4dfe438894b780c74d0070254239ec6f63688941 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 9 Feb 2021 21:04:15 -0800 Subject: [PATCH] ARROW-11559: [C++] Use smarter Flatbuffers verification parameters Flatbuffers is able to encode a virtually unbounded of schema fields in a small buffer size. Verifying that many fields with the Flatbuffers verifier seems to result in potentially unbounded verification times, which is a denial of service risk. To mitigate the risk, impose that a Flatbuffers buffer cannot represent one more than one Flatbuffers table per buffer bit, which should always be true for well-formed Arrow IPC metadata. Indeed, the only recursive table, the `Field` table in Schema.fbs, mandates the presence of its `type` member (though it's not marked as required in the Flatbuffers definition, it's validated by the IPC read routines). TODO: * [ ] Add OSS-Fuzz regression file Closes #9447 from pitrou/ARROW-11559-fbb-verification-params Authored-by: Antoine Pitrou Signed-off-by: Micah Kornfield --- cpp/src/arrow/ipc/metadata_internal.h | 16 ++++++++++++++-- cpp/src/arrow/ipc/reader.cc | 8 +++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/ipc/metadata_internal.h b/cpp/src/arrow/ipc/metadata_internal.h index d5c697fe57b07..9cf489dd66884 100644 --- a/cpp/src/arrow/ipc/metadata_internal.h +++ b/cpp/src/arrow/ipc/metadata_internal.h @@ -156,10 +156,22 @@ Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr Status GetKeyValueMetadata(const KVVector* fb_metadata, std::shared_ptr* out); +template +bool VerifyFlatbuffers(const uint8_t* data, int64_t size) { + // Heuristic: tables in a Arrow flatbuffers buffer must take at least 1 bit + // each in average (ARROW-11559). + // Especially, the only recursive table (the `Field` table in Schema.fbs) + // must have a non-empty `type` member. + flatbuffers::Verifier verifier( + data, static_cast(size), + /*max_depth=*/128, + /*max_tables=*/static_cast(8 * size)); + return verifier.VerifyBuffer(nullptr); +} + static inline Status VerifyMessage(const uint8_t* data, int64_t size, const flatbuf::Message** out) { - flatbuffers::Verifier verifier(data, size, /*max_depth=*/128); - if (!flatbuf::VerifyMessageBuffer(verifier)) { + if (!VerifyFlatbuffers(data, size)) { return Status::IOError("Invalid flatbuffers message."); } *out = flatbuf::GetMessage(data); diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 57bf57a5e17f0..82fb4c743a435 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -1052,11 +1052,9 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { footer_buffer_, file_->ReadAt(footer_offset_ - footer_length - file_end_size, footer_length)); - auto data = footer_buffer_->data(); - flatbuffers::Verifier verifier(data, footer_buffer_->size(), /*max_depth=*/128, - /*max_tables=*/UINT_MAX); - - if (!flatbuf::VerifyFooterBuffer(verifier)) { + const auto data = footer_buffer_->data(); + const auto size = footer_buffer_->size(); + if (!internal::VerifyFlatbuffers(data, size)) { return Status::IOError("Verification of flatbuffer-encoded Footer failed."); } footer_ = flatbuf::GetFooter(data);