Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added fuzzing support #3007

Merged
merged 14 commits into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions gumbo-parser/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
build
googletest
src/*.o
fuzzer/build
src/libgumbo.a
15 changes: 15 additions & 0 deletions gumbo-parser/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,20 @@ LDFLAGS := -pthread

all: check

fuzzing: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan

fuzzer-normal:
cd fuzzer && ./build.sh && cd -

fuzzer-asan:
cd fuzzer && SANITIZER=address ./build.sh && cd -

fuzzer-ubsan:
cd fuzzer && SANITIZER=undefined ./build.sh && cd -

fuzzer-msan:
cd fuzzer && SANITIZER=memory ./build.sh && cd -

# don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
# the generated files should be committed to SCM
ifneq ($(CI),true)
Expand Down Expand Up @@ -81,6 +95,7 @@ coverage:

clean:
$(RM) -r build
$(RM) -r fuzzer/build

build/src/flags: | build/src
@echo 'old_CC := $(CC)' > $@
Expand Down
46 changes: 46 additions & 0 deletions gumbo-parser/fuzzer/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
export SANITIZER_OPTS=""
export SANITIZER_LINK=""

if [ -z "${LLVM_CONFIG}" ]
then
echo '$LLVM_CONFIG has not been configured, expecting "export LLVM_CONFIG=/usr/bin/llvm-config-12" assuming clang-12 is installed, however any clang version works'
exit
fi
fuzzy-boiii23a marked this conversation as resolved.
Show resolved Hide resolved

if [ ! -d "build" ]
then
mkdir build
fi

export CC="$(llvm-config-12 --bindir)/clang"
export CXX="$(llvm-config-12 --bindir)/clang++"
fuzzy-boiii23a marked this conversation as resolved.
Show resolved Hide resolved
export CXXFLAGS="-fsanitize=fuzzer-no-link"
export CFLAGS="-fsanitize=fuzzer-no-link"
export ENGINE_LINK="$(find $($LLVM_CONFIG --libdir) -name libclang_rt.fuzzer-x86_64.a | head -1)"

if [ "$SANITIZER" = "undefined" ]
then
export SANITIZER_OPTS="-fsanitize=undefined"
export SANITIZER_LINK="$(find $($LLVM_CONFIG --libdir) -name libclang_rt.ubsan_standalone_cxx-x86_64.a | head -1)"
fi
if [ "$SANITIZER" = "address" ]
then
export SANITIZER_OPTS="-fsanitize=address"
export SANITIZER_LINK="$(find $($LLVM_CONFIG --libdir) -name libclang_rt.asan_cxx-x86_64.a | head -1)"
fi
if [ "$SANITIZER" = "memory" ]
then
export SANITIZER_OPTS="-fsanitize=memory -fPIE -pie -Wno-unused-command-line-argument"
export SANITIZER_LINK="$(find $($LLVM_CONFIG --libdir) -name libclang_rt.msan_cxx-x86_64.a | head -1)"
fi

export CXXFLAGS="-O3 $CXXFLAGS $SANITIZER_OPTS"
export CFLAGS="-O3 $CFLAGS $SANITIZER_OPTS"
cd ../src && make clean && make && cd -

if [ -z "${SANITIZER}" ]
then
$CXX $CXXFLAGS -o build/parse_fuzzer parse_fuzzer.cc ../src/libgumbo.a $ENGINE_LINK $SANITIZER_LINK
else
$CXX $CXXFLAGS -o build/parse_fuzzer-$SANITIZER parse_fuzzer.cc ../src/libgumbo.a $ENGINE_LINK $SANITIZER_LINK
fi
68 changes: 68 additions & 0 deletions gumbo-parser/fuzzer/parse_fuzzer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#include "../src/nokogiri_gumbo.h"
#include <stdint.h>

int SanityCheckPointers(const char* input, size_t input_length, const GumboNode* node, int depth) {
if (node->type == GUMBO_NODE_DOCUMENT || depth > 400) {
return -1;
}
if (node->type == GUMBO_NODE_ELEMENT) {
const GumboElement* element = &node->v.element;
const GumboVector* attributes = &element->attributes;

for (unsigned int i = 0; i < attributes->length; ++i) {
const GumboAttribute* attribute = static_cast<const GumboAttribute*>(attributes->data[i]);
if (!attribute)
{
return -1;
}
}
const GumboVector* children = &element->children;
for (unsigned int i = 0; i < children->length; ++i) {
const GumboNode* child = static_cast<const GumboNode*>(children->data[i]);
if (!child)
{
return -1;
}
SanityCheckPointers(input, input_length, child, depth + 1);
}
} else {
const GumboText* text = &node->v.text;
if (!text)
{
return -1;
}
}

return 0;
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size < 10)
{
return 0;
}

GumboOptions options = kGumboDefaultOptions;
GumboOutput* output;
GumboNode* root;

output = gumbo_parse_with_options(&options, (char*)data, size);
root = output->document;

int result = SanityCheckPointers((char*)data, size, output->root, 0);

if (result < 0)
{
if (output) {
gumbo_destroy_output(output);
}

return -1;
}

if (output) {
gumbo_destroy_output(output);
}

return 0;
}