diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index f1f0ba439dac..e0bf26470dfd 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -24,7 +24,7 @@ Describe changes:
### Provide values to any of the below to override the defaults.
-- To use an LibHTP, Suricata-Verify or Suricata-Update pull request,
+- To use a Suricata-Verify or Suricata-Update pull request,
link to the pull request in the respective `_BRANCH` variable.
- Leave unused overrides blank or remove.
@@ -32,5 +32,3 @@ SV_REPO=
SV_BRANCH=
SU_REPO=
SU_BRANCH=
-LIBHTP_REPO=
-LIBHTP_BRANCH=
diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml
index a23adfc901ae..531815928fa0 100644
--- a/.github/workflows/builds.yml
+++ b/.github/workflows/builds.yml
@@ -9,8 +9,6 @@ on:
pull_request:
workflow_dispatch:
inputs:
- LIBHTP_REPO:
- LIBHTP_BRANCH:
SU_REPO:
SU_BRANCH:
SV_REPO:
@@ -148,7 +146,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xvf prep/libhtp.tar.gz
- run: tar xvf prep/suricata-update.tar.gz
- run: tar xvf prep/suricata-verify.tar.gz
- name: Configuring
@@ -296,7 +293,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xvf prep/libhtp.tar.gz
- run: tar xvf prep/suricata-update.tar.gz
- run: tar xvf prep/suricata-verify.tar.gz
- name: Build
@@ -314,81 +310,6 @@ jobs:
run: cargo clippy --all-features
working-directory: rust
- almalinux-9-non-bundled-libhtp:
- name: AlmaLinux 9 Non-Bundled LibHTP
- runs-on: ubuntu-latest
- container: almalinux:9
- needs: [prepare-deps, ubuntu-22-04-dist]
- steps:
- # Cache Rust stuff.
- - name: Cache cargo registry
- uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2
- with:
- path: ~/.cargo/registry
- key: cargo-registry
-
- - name: Cache RPMs
- uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2
- with:
- path: /var/cache/dnf
- key: ${{ github.job }}-dnf
- - run: echo "keepcache=1" >> /etc/dnf/dnf.conf
-
- - name: Determine number of CPUs
- run: echo CPUS=$(nproc --all) >> $GITHUB_ENV
-
- - name: Install system packages
- run: |
- dnf -y install dnf-plugins-core epel-release
- dnf config-manager --set-enabled crb
- dnf -y install \
- autoconf \
- automake \
- cargo-vendor \
- cbindgen \
- diffutils \
- numactl-devel \
- dpdk-devel \
- file-devel \
- gcc \
- gcc-c++ \
- git \
- jansson-devel \
- jq \
- libtool \
- libyaml-devel \
- libnfnetlink-devel \
- libnetfilter_queue-devel \
- libnet-devel \
- libcap-ng-devel \
- libevent-devel \
- libmaxminddb-devel \
- libpcap-devel \
- libtool \
- lz4-devel \
- make \
- pcre2-devel \
- pkgconfig \
- python3-devel \
- python3-sphinx \
- python3-yaml \
- rust-toolset \
- sudo \
- which \
- zlib-devel
-
- - name: Download suricata.tar.gz
- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16
- with:
- name: dist
-
- - run: tar xf suricata-*.tar.gz --strip-components=1
- - run: cd libhtp && ./configure --prefix=/usr/local
- - run: cd libhtp && make -j ${{ env.CPUS }}
- - run: cd libhtp && make install
-
- - run: PKG_CONFIG_PATH=/usr/local/lib/pkgconfig ./configure --enable-non-bundled-htp --with-libhtp-includes=/usr/local/include --with-libhtp-libraries=/usr/local/lib
-
rpms:
name: Build RPMs
runs-on: ubuntu-latest
@@ -547,7 +468,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xvf prep/libhtp.tar.gz
- run: tar xvf prep/suricata-update.tar.gz
- run: tar xvf prep/suricata-verify.tar.gz
- uses: ./.github/actions/install-cbindgen
@@ -727,7 +647,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: ./autogen.sh
- run: ./configure --enable-warnings --disable-shared
@@ -823,7 +742,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: ./autogen.sh
- run: CC="clang" CFLAGS="$DEFAULT_CFLAGS -Wshadow" ./configure --disable-shared
@@ -917,7 +835,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: ./autogen.sh
- run: ./configure --enable-warnings --enable-debug --enable-unittests --disable-shared --enable-rust-strict --enable-hiredis --enable-nfqueue
@@ -1017,7 +934,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: ./autogen.sh
- run: CC="clang" CFLAGS="$DEFAULT_CFLAGS -Wshadow -fsanitize=address -fno-omit-frame-pointer" ./configure --enable-debug --enable-unittests --disable-shared --enable-rust-strict --enable-hiredis --enable-nfqueue
@@ -1108,7 +1024,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: ./autogen.sh
- run: ./configure --enable-debug --enable-unittests --disable-shared --enable-rust-strict --enable-hiredis --enable-nfqueue
@@ -1194,7 +1109,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: tar xf prep/suricata-verify.tar.gz
- run: mkdir /home/suricata/suricata
@@ -1286,7 +1200,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: ./autogen.sh
- run: |
if ./configure; then
@@ -1348,7 +1261,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: ./autogen.sh
- run: CFLAGS="${DEFAULT_CFLAGS}" ./configure
- run: make -j ${{ env.CPUS }}
@@ -1416,7 +1328,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: tar xf prep/suricata-verify.tar.gz
- run: ./autogen.sh
@@ -1493,7 +1404,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- uses: ./.github/actions/install-cbindgen
- run: ./autogen.sh
- run: ./configure --enable-warnings --disable-shared --enable-unittests
@@ -1549,14 +1459,6 @@ jobs:
CARGO_INCREMENTAL: 0
- run: llvm-profdata-14 merge -o ct.profdata /tmp/ct.profraw
- run: llvm-cov-14 show $(find rust/target/debug/deps/ -type f -regex 'rust/target/debug/deps/suricata\-[a-z0-9]+$') -instr-profile=ct.profdata --show-instantiations --ignore-filename-regex="^/root/.*" >> coverage.txt
- - run: |
- cd libhtp
- make test
- cd ..
- env:
- LLVM_PROFILE_FILE: "/tmp/htp-test.profraw"
- - run: llvm-profdata-14 merge -o htp-test.profdata /tmp/htp-test.profraw
- - run: llvm-cov-14 show libhtp/test/test_all -instr-profile=htp-test.profdata --show-instantiations --ignore-filename-regex="^/root/.*" >> coverage.txt
- name: Upload coverage to Codecov
uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238
with:
@@ -1634,7 +1536,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-verify.tar.gz
- uses: ./.github/actions/install-cbindgen
- name: Fix kernel mmap rnd bits
@@ -1740,7 +1641,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- uses: ./.github/actions/install-cbindgen
- name: Fix kernel mmap rnd bits
# Asan in llvm 14 provided in ubuntu 22.04 is incompatible with
@@ -1875,7 +1775,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- name: Extracting suricata-verify
run: tar xf prep/suricata-verify.tar.gz
- name: Fix kernel mmap rnd bits
@@ -1963,7 +1862,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- name: Fix kernel mmap rnd bits
run: sudo sysctl vm.mmap_rnd_bits=28
- run: ./autogen.sh
@@ -2066,7 +1964,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- uses: ./.github/actions/install-cbindgen
- name: Fix kernel mmap rnd bits
# Asan in llvm 14 provided in ubuntu 22.04 is incompatible with
@@ -2157,7 +2054,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- uses: ./.github/actions/install-cbindgen
- run: ./autogen.sh
@@ -2307,7 +2203,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- uses: ./.github/actions/install-cbindgen
- name: Fix kernel mmap rnd bits
# Asan in llvm 14 provided in ubuntu 22.04 is incompatible with
@@ -2385,7 +2280,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- uses: ./.github/actions/install-cbindgen
- run: ./autogen.sh
- run: AFL_HARDEN=1 ac_cv_func_realloc_0_nonnull=yes ac_cv_func_malloc_0_nonnull=yes CFLAGS="-fsanitize=address -fno-omit-frame-pointer" CXXFLAGS=$CFLAGS CC=afl-clang-fast CXX=afl-clang-fast++ LDFLAGS="-fsanitize=address" ./configure --enable-warnings --enable-fuzztargets --disable-shared
@@ -2485,7 +2379,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- uses: ./.github/actions/install-cbindgen
- run: ./autogen.sh
- run: CFLAGS="${DEFAULT_CFLAGS}" ./configure --enable-warnings --enable-netmap
@@ -2520,7 +2413,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: tar xf prep/suricata-verify.tar.gz
- run: ./autogen.sh
@@ -2621,7 +2513,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- uses: ./.github/actions/install-cbindgen
- run: ./autogen.sh
- run: CFLAGS="${DEFAULT_CFLAGS}" ./configure --enable-warnings --enable-dpdk
@@ -2702,7 +2593,6 @@ jobs:
run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain $(grep rust-version rust/Cargo.toml.in|sed 's/\"//g'|awk '{print $3}') -y
- run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- uses: ./.github/actions/install-cbindgen
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: tar xf prep/suricata-verify.tar.gz
- run: ./autogen.sh
@@ -2800,7 +2690,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- uses: ./.github/actions/install-cbindgen
- run: ./autogen.sh
@@ -2886,7 +2775,6 @@ jobs:
run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain $(grep rust-version rust/Cargo.toml.in|sed 's/\"//g'|awk '{print $3}') -y
- run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- uses: ./.github/actions/install-cbindgen
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: tar xf prep/suricata-verify.tar.gz
- run: ./autogen.sh
@@ -2962,7 +2850,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- uses: ./.github/actions/install-cbindgen
- run: ./autogen.sh
@@ -3033,7 +2920,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- uses: ./.github/actions/install-cbindgen
- run: ./autogen.sh
@@ -3084,7 +2970,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xvf prep/libhtp.tar.gz
- run: tar xvf prep/suricata-update.tar.gz
- name: Create Python virtual environment
run: python3 -m venv ./testenv
@@ -3093,10 +2978,14 @@ jobs:
. ./testenv/bin/activate
pip install pyyaml
- run: ./autogen.sh
- - run: CPATH="$HOMEBREW_PREFIX/include:$CPATH" LIBRARY_PATH="$HOMEBREW_PREFIX/lib:$LIBRARY_PATH" PATH="/opt/homebrew/opt/libtool/libexec/gnubin:$PATH" CFLAGS="${DEFAULT_CFLAGS}" ./configure --enable-warnings --enable-unittests --prefix="$HOME/.local/"
- - run: CPATH="$HOMEBREW_PREFIX/include:$CPATH" LIBRARY_PATH="$HOMEBREW_PREFIX/lib:$LIBRARY_PATH" PATH="/opt/homebrew/opt/libtool/libexec/gnubin:$PATH" CFLAGS="${DEFAULT_CFLAGS}" make -j2
- # somehow it gets included by some C++ stdlib header (case unsensitive)
- - run: rm libhtp/VERSION && make check
+ - env:
+ CPATH: "$HOMEBREW_PREFIX/include:$CPATH"
+ LIBRARY_PATH: "$HOMEBREW_PREFIX/lib:$LIBRARY_PATH"
+ PATH: "/opt/homebrew/opt/libtool/libexec/gnubin:$PATH"
+ CFLAGS: "${{ env.DEFAULT_CFLAGS }}
+ - run: ./configure --enable-warnings --enable-unittests --prefix="$HOME/.local/"
+ - run: make -j2
+ - run: make check
- run: tar xf prep/suricata-verify.tar.gz
- name: Running suricata-verify
run: |
@@ -3138,7 +3027,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- name: Npcap DLL
run: |
@@ -3194,7 +3082,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: tar xf prep/suricata-verify.tar.gz
- name: Build
@@ -3238,7 +3125,6 @@ jobs:
with:
name: prep
path: prep
- - run: tar xf prep/libhtp.tar.gz
- name: WinDivert
run: |
curl -sL -O https://github.com/basil00/Divert/releases/download/v1.4.3/WinDivert-1.4.3-A.zip
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 9717f8762d0a..b518308dfad6 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -54,7 +54,6 @@ jobs:
sudo apt-get install libjansson-dev
sudo apt-get install libpcap-dev
sudo apt-get install libnuma-dev
- git clone --depth 1 https://github.com/OISF/libhtp.git
cargo install cbindgen
export PATH=/opt/work/.cargo/bin:$PATH
chmod +x autogen.sh
diff --git a/.github/workflows/commits.yml b/.github/workflows/commits.yml
index 16c557a36b7d..b18ee26e9c13 100644
--- a/.github/workflows/commits.yml
+++ b/.github/workflows/commits.yml
@@ -80,7 +80,6 @@ jobs:
# The action above is supposed to do this for us, but it doesn't appear to stick.
- run: /usr/bin/git config --global --add safe.directory /__w/suricata/suricata
- run: git fetch
- - run: git clone https://github.com/OISF/libhtp -b 0.5.x
- name: Building all commits
run: |
echo "Building commits from ${GITHUB_BASE_REF}."
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 56a94e64d5a4..2a16ea7051da 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -8,8 +8,6 @@ on:
pull_request:
workflow_dispatch:
inputs:
- LIBHTP_REPO:
- LIBHTP_BRANCH:
SU_REPO:
SU_BRANCH:
SV_REPO:
@@ -130,7 +128,6 @@ jobs:
cp prep/cbindgen $HOME/.cargo/bin
chmod 755 $HOME/.cargo/bin/cbindgen
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- - run: tar xf prep/libhtp.tar.gz
- run: tar xf prep/suricata-update.tar.gz
- run: ./autogen.sh
- run: CFLAGS="${DEFAULT_CFLAGS}" ./configure
diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml
index 0366f104ec89..d70eeea3f64d 100644
--- a/.github/workflows/formatting.yml
+++ b/.github/workflows/formatting.yml
@@ -128,7 +128,6 @@ jobs:
echo "No github merge commit found"
fi
shell: bash {0}
- - run: git clone https://github.com/OISF/libhtp -b 0.5.x
- run: ./autogen.sh
- run: ./configure --enable-warnings --enable-unittests
- name: Check formatting
diff --git a/.github/workflows/prepare-deps.yml b/.github/workflows/prepare-deps.yml
index 85b59687cda6..ffd72b8183ef 100644
--- a/.github/workflows/prepare-deps.yml
+++ b/.github/workflows/prepare-deps.yml
@@ -29,9 +29,6 @@ jobs:
echo "Parsing branch and PR info from:"
echo "${body}"
- LIBHTP_REPO=$(echo "${body}" | awk -F = '/^LIBHTP_REPO=/ { print $2 }')
- LIBHTP_BRANCH=$(echo "${body}" | awk -F = '/^LIBHTP_BRANCH=/ { print $2 }')
-
SU_REPO=$(echo "${body}" | awk -F = '/^SU_REPO=/ { print $2 }')
SU_BRANCH=$(echo "${body}" | awk -F = '/^SU_BRANCH=/ { print $2 }')
@@ -39,8 +36,6 @@ jobs:
SV_BRANCH=$(echo "${body}" | awk -F = '/^SV_BRANCH=/ { print $2 }')
else
echo "No pull request body, will use inputs or defaults."
- LIBHTP_REPO=${{ inputs.LIBHTP_REPO }}
- LIBHTP_BRANCH=${{ inputs.LIBHTP_BRANCH }}
SU_REPO=${{ inputs.SU_REPO }}
SU_BRANCH=${{ inputs.SU_BRANCH }}
SV_REPO=${{ inputs.SV_REPO }}
@@ -48,9 +43,6 @@ jobs:
fi
# If the _REPO variables don't contain a full URL, add GitHub.
- if [ "${LIBHTP_REPO}" ] && ! echo "${LIBHTP_REPO}" | grep -q '^https://'; then
- LIBHTP_REPO="https://github.com/${LIBHTP_REPO}"
- fi
if [ "${SU_REPO}" ] && ! echo "${SU_REPO}" | grep -q '^https://'; then
SU_REPO="https://github.com/${SU_REPO}"
fi
@@ -58,9 +50,6 @@ jobs:
SV_REPO="https://github.com/${SV_REPO}"
fi
- echo LIBHTP_REPO=${LIBHTP_REPO} | tee -a ${GITHUB_ENV}
- echo LIBHTP_BRANCH=${LIBHTP_BRANCH} | tee -a ${GITHUB_ENV}
-
echo SU_REPO=${SU_REPO} | tee -a ${GITHUB_ENV}
echo SU_BRANCH=${SU_BRANCH} | tee -a ${GITHUB_ENV}
@@ -69,8 +58,6 @@ jobs:
- name: Annotate output
run: |
- echo "::notice:: LIBHTP_REPO=${LIBHTP_REPO}"
- echo "::notice:: LIBHTP_BRANCH=${LIBHTP_BRANCH}"
echo "::notice:: SU_REPO=${SU_REPO}"
echo "::notice:: SU_BRANCH=${SU_BRANCH}"
echo "::notice:: SV_REPO=${SV_REPO}"
@@ -81,10 +68,6 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- run: git config --global --add safe.directory /__w/suricata/suricata
- - name: Fetching libhtp
- run: |
- DESTDIR=./bundle ./scripts/bundle.sh libhtp
- tar zcf libhtp.tar.gz -C bundle libhtp
- name: Fetching suricata-update
run: |
DESTDIR=./bundle ./scripts/bundle.sh suricata-update
@@ -116,6 +99,5 @@ jobs:
with:
name: prep
path: |
- libhtp.tar.gz
suricata-update.tar.gz
suricata-verify.tar.gz
diff --git a/.github/workflows/rust-checks.yml b/.github/workflows/rust-checks.yml
index 0a701ac6d847..2cab65011aa9 100644
--- a/.github/workflows/rust-checks.yml
+++ b/.github/workflows/rust-checks.yml
@@ -83,7 +83,6 @@ jobs:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Configure Suricata
run: |
- ./scripts/bundle.sh libhtp
./autogen.sh
./configure --enable-warnings
- name: Run Cargo Audit
@@ -165,7 +164,6 @@ jobs:
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Configure Suricata
run: |
- ./scripts/bundle.sh libhtp
./autogen.sh
./configure --enable-warnings
- name: Cargo Update and Build
diff --git a/.github/workflows/scan-build.yml b/.github/workflows/scan-build.yml
index 366f5233ccd3..f1ad46ab1f3d 100644
--- a/.github/workflows/scan-build.yml
+++ b/.github/workflows/scan-build.yml
@@ -74,11 +74,10 @@ jobs:
- run: scan-build-18 ./configure --enable-warnings --enable-dpdk --enable-nfqueue --enable-nflog
env:
CC: clang-18
- # exclude libhtp from the analysis
# disable security.insecureAPI.DeprecatedOrUnsafeBufferHandling explicitly as
# this will require significant effort to address.
- run: |
- scan-build-18 --status-bugs --exclude libhtp/ --exclude rust \
+ scan-build-18 --status-bugs --exclude rust \
-enable-checker valist.Uninitialized \
-enable-checker valist.CopyToSelf \
-enable-checker valist.Unterminated \
diff --git a/.gitignore b/.gitignore
index 66416e27d14e..e214bbd78e69 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,19 +32,6 @@ doc/userguide/suricata.1
etc/suricata.logrotate
etc/suricata.service
install-sh
-libhtp/TAGS
-libhtp/aclocal.m4
-libhtp/autom4te.cache/
-libhtp/config.h
-libhtp/config.log
-libhtp/config.status
-libhtp/configure
-libhtp/htp.pc
-libhtp/htp/TAGS
-libhtp/htp/libhtp.la
-libhtp/libtool
-libhtp/stamp-h1
-libhtp/test/TAGS
libtool
ltmain.sh
missing
diff --git a/Makefile.am b/Makefile.am
index 20e50bdc4a03..62ed549874bd 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -12,7 +12,7 @@ EXTRA_DIST = ChangeLog COPYING LICENSE suricata.yaml.in \
scripts/docs-ubuntu-debian-minimal-build.sh \
scripts/evedoc.py \
examples/plugins
-SUBDIRS = $(HTP_DIR) rust src plugins qa rules doc contrib etc python ebpf \
+SUBDIRS = rust src plugins qa rules doc contrib etc python ebpf \
$(SURICATA_UPDATE_DIR)
DIST_SUBDIRS = $(SUBDIRS) examples/lib/simple
@@ -53,7 +53,7 @@ endif
@echo "You can now start suricata by running as root something like:"
@echo " $(DESTDIR)$(bindir)/suricata -c $(DESTDIR)$(e_sysconfdir)suricata.yaml -i eth0"
@echo ""
- @echo "If a library like libhtp.so is not found, you can run suricata with:"
+ @echo "If a shared library is not found, you can add library paths with:"
@echo " LD_LIBRARY_PATH="$(DESTDIR)$(prefix)/lib" "$(DESTDIR)$(bindir)/suricata" -c "$(DESTDIR)$(e_sysconfdir)suricata.yaml" -i eth0"
@echo ""
@echo "The Emerging Threats Open rules are now installed. Rules can be"
diff --git a/configure.ac b/configure.ac
index ca964d9039a0..32cbff33b165 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1565,108 +1565,6 @@
[test "x$install_suricata_update" = "xyes"])
AC_SUBST([install_suricata_update_reason])
- # libhtp
- AC_ARG_ENABLE(non-bundled-htp,
- AS_HELP_STRING([--enable-non-bundled-htp], [Enable the use of an already installed version of htp]),[enable_non_bundled_htp=$enableval],[enable_non_bundled_htp=no])
- AS_IF([test "x$enable_non_bundled_htp" = "xyes"], [
- PKG_CHECK_MODULES([libhtp], htp,, [with_pkgconfig_htp=no])
- if test "$with_pkgconfig_htp" != "no"; then
- CPPFLAGS="${CPPFLAGS} ${libhtp_CFLAGS}"
- LIBS="${LIBS} ${libhtp_LIBS}"
- fi
-
- AC_ARG_WITH(libhtp_includes,
- [ --with-libhtp-includes=DIR libhtp include directory],
- [with_libhtp_includes="$withval"],[with_libhtp_includes=no])
- AC_ARG_WITH(libhtp_libraries,
- [ --with-libhtp-libraries=DIR libhtp library directory],
- [with_libhtp_libraries="$withval"],[with_libhtp_libraries="no"])
-
- if test "$with_libhtp_includes" != "no"; then
- CPPFLAGS="-I${with_libhtp_includes} ${CPPFLAGS}"
- fi
-
- if test "$with_libhtp_libraries" != "no"; then
- LDFLAGS="${LDFLAGS} -L${with_libhtp_libraries}"
- fi
-
- AC_CHECK_HEADER(htp/htp.h,,[AC_MSG_ERROR(htp/htp.h not found ...)])
-
- LIBHTP=""
- AC_CHECK_LIB(htp, htp_conn_create,, LIBHTP="no")
- if test "$LIBHTP" = "no"; then
- echo
- echo " ERROR! libhtp library not found"
- echo
- exit 1
- fi
- PKG_CHECK_MODULES(LIBHTPMINVERSION, [htp >= 0.5.45],[libhtp_minver_found="yes"],[libhtp_minver_found="no"])
- if test "$libhtp_minver_found" = "no"; then
- PKG_CHECK_MODULES(LIBHTPDEVVERSION, [htp = 0.5.X],[libhtp_devver_found="yes"],[libhtp_devver_found="no"])
- if test "$libhtp_devver_found" = "no"; then
- echo
- echo " ERROR! libhtp was found but it is neither >= 0.5.45, nor the dev 0.5.X"
- echo
- exit 1
- fi
- fi
-
- AC_CHECK_LIB([htp], [htp_config_register_request_uri_normalize],AC_DEFINE_UNQUOTED([HAVE_HTP_URI_NORMALIZE_HOOK],[1],[Found htp_config_register_request_uri_normalize function in libhtp]) ,,[-lhtp])
- # check for htp_tx_get_response_headers_raw
- AC_CHECK_LIB([htp], [htp_tx_get_response_headers_raw],AC_DEFINE_UNQUOTED([HAVE_HTP_TX_GET_RESPONSE_HEADERS_RAW],[1],[Found htp_tx_get_response_headers_raw in libhtp]) ,,[-lhtp])
- AC_CHECK_LIB([htp], [htp_decode_query_inplace],AC_DEFINE_UNQUOTED([HAVE_HTP_DECODE_QUERY_INPLACE],[1],[Found htp_decode_query_inplace function in libhtp]) ,,[-lhtp])
- AC_CHECK_LIB([htp], [htp_config_set_response_decompression_layer_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_RESPONSE_DECOMPRESSION_LAYER_LIMIT],[1],[Found htp_config_set_response_decompression_layer_limit function in libhtp]) ,,[-lhtp])
- AC_CHECK_LIB([htp], [htp_config_set_allow_space_uri],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_ALLOW_SPACE_URI],[1],[Found htp_config_set_allow_space_uri function in libhtp]) ,,[-lhtp])
- AC_EGREP_HEADER(htp_config_set_path_decode_u_encoding, htp/htp.h, AC_DEFINE_UNQUOTED([HAVE_HTP_SET_PATH_DECODE_U_ENCODING],[1],[Found usable htp_config_set_path_decode_u_encoding function in libhtp]) )
- AC_CHECK_LIB([htp], [htp_config_set_lzma_memlimit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_MEMLIMIT],[1],[Found htp_config_set_lzma_memlimit function in libhtp]) ,,[-lhtp])
- AC_CHECK_LIB([htp], [htp_config_set_lzma_layers],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_LAYERS],[1],[Found htp_config_set_lzma_layers function in libhtp]) ,,[-lhtp])
- AC_CHECK_LIB([htp], [htp_config_set_compression_bomb_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_BOMB_LIMIT],[1],[Found htp_config_set_compression_bomb_limit function in libhtp]) ,,[-lhtp])
- AC_CHECK_LIB([htp], [htp_config_set_compression_time_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_TIME_LIMIT],[1],[Found htp_config_set_compression_time_limit function in libhtp]) ,,[-lhtp])
- AC_CHECK_LIB([htp], [htp_config_set_max_tx],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_MAX_TX],[1],[Found htp_config_set_max_tx function in libhtp]) ,,[-lhtp])
- AC_CHECK_LIB([htp], [htp_config_set_number_headers_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_HEADERS_LIMIT],[1],[Found htp_config_set_number_headers_limit function in libhtp]) ,,[-lhtp])
- ])
-
- if test "x$enable_non_bundled_htp" = "xno"; then
- # test if we have a bundled htp
- if test -d "$srcdir/libhtp"; then
- AC_CONFIG_SUBDIRS([libhtp])
- HTP_DIR="libhtp"
- AC_SUBST(HTP_DIR)
- HTP_LDADD="../libhtp/htp/libhtp.la"
- AC_SUBST(HTP_LDADD)
- # make sure libhtp is added to the includes
- CPPFLAGS="-I\${srcdir}/../libhtp/ ${CPPFLAGS}"
-
- AC_CHECK_HEADER(iconv.h,,[AC_MSG_ERROR(iconv.h not found ...)])
- AC_CHECK_LIB(iconv, libiconv_close)
- AC_DEFINE_UNQUOTED([HAVE_HTP_URI_NORMALIZE_HOOK],[1],[Assuming htp_config_register_request_uri_normalize function in bundled libhtp])
- AC_DEFINE_UNQUOTED([HAVE_HTP_TX_GET_RESPONSE_HEADERS_RAW],[1],[Assuming htp_tx_get_response_headers_raw function in bundled libhtp])
- AC_DEFINE_UNQUOTED([HAVE_HTP_DECODE_QUERY_INPLACE],[1],[Assuming htp_decode_query_inplace function in bundled libhtp])
- # enable when libhtp has been updated
- AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_RESPONSE_DECOMPRESSION_LAYER_LIMIT],[1],[Assuming htp_config_set_response_decompression_layer_limit function in bundled libhtp])
- AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_ALLOW_SPACE_URI],[1],[Assuming htp_config_set_allow_space_uri function in bundled libhtp])
- AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_MEMLIMIT],[1],[Assuming htp_config_set_lzma_memlimit function in bundled libhtp])
- AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_LAYERS],[1],[Assuming htp_config_set_lzma_layers function in bundled libhtp])
- AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_BOMB_LIMIT],[1],[Assuming htp_config_set_compression_bomb_limit function in bundled libhtp])
- AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_TIME_LIMIT],[1],[Assuming htp_config_set_compression_time_limit function in bundled libhtp])
- AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_MAX_TX],[1],[Assuming htp_config_set_max_tx function in bundled libhtp])
- AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_HEADERS_LIMIT],[1],[Assuming htp_config_set_number_headers_limit function in bundled libhtp])
- else
- echo
- echo " ERROR: Libhtp is not bundled. Get libhtp by doing:"
- echo " git clone https://github.com/OISF/libhtp"
- echo " Then re-run Suricata's autogen.sh and configure script."
- echo " Or, if libhtp is installed in a different location,"
- echo " pass --enable-non-bundled-htp to Suricata's configure script."
- echo " Add --with-libhtp-includes=
and --with-libhtp-libraries= if"
- echo " libhtp is not installed in the include and library paths."
- echo
- exit 1
- fi
- fi
-
- AM_CONDITIONAL([HTP_LDADD], [test "x${HTP_LDADD}" != "x"])
-
# Check for libcap-ng
case $host in
*-*-linux*)
@@ -2511,7 +2409,6 @@ AC_SUBST(MAJOR_MINOR)
AC_SUBST(RUST_FEATURES)
AC_SUBST(RUST_SURICATA_LIBDIR)
AC_SUBST(RUST_SURICATA_LIBNAME)
-AC_SUBST(enable_non_bundled_htp)
AM_CONDITIONAL([BUILD_SHARED_LIBRARY], [test "x$enable_shared" = "xyes"] && [test "x$can_build_shared_library" = "xyes"])
@@ -2559,7 +2456,6 @@ SURICATA_BUILD_CONF="Suricata Configuration:
GeoIP2 support: ${enable_geoip}
JA3 support: ${enable_ja3}
JA4 support: ${enable_ja4}
- Non-bundled htp: ${enable_non_bundled_htp}
Hyperscan support: ${enable_hyperscan}
Libnet support: ${enable_libnet}
liblz4 support: ${enable_liblz4}
diff --git a/doc/userguide/devguide/codebase/installation-from-git.rst b/doc/userguide/devguide/codebase/installation-from-git.rst
index 9d7a45a54392..373a6e4fe968 100644
--- a/doc/userguide/devguide/codebase/installation-from-git.rst
+++ b/doc/userguide/devguide/codebase/installation-from-git.rst
@@ -72,7 +72,7 @@ Next, enter the following line in the terminal:
git clone https://github.com/OISF/suricata.git
cd suricata
-Libhtp and suricata-update are not bundled. Get them by doing:
+Suricata-update is not bundled. Get it by doing:
.. code-block:: bash
diff --git a/doc/userguide/devguide/codebase/testing.rst b/doc/userguide/devguide/codebase/testing.rst
index c712e90a99b8..41cd88c81047 100644
--- a/doc/userguide/devguide/codebase/testing.rst
+++ b/doc/userguide/devguide/codebase/testing.rst
@@ -30,7 +30,7 @@ Use these to check that specific functions behave as expected, in success and in
during development, for nom parsers in the Rust codebase, for instance, or for checking that messages
or message parts of a protocol/stream are processed as they should.
-To execute all unit tests (both from C and Rust code), as well as ``libhtp`` ones, from the Suricata main directory, run::
+To execute all unit tests (both from C and Rust code) from the Suricata main directory, run::
make check
diff --git a/doc/userguide/lua/lua-functions.rst b/doc/userguide/lua/lua-functions.rst
index 92473d52c35e..f74d845b6c13 100644
--- a/doc/userguide/lua/lua-functions.rst
+++ b/doc/userguide/lua/lua-functions.rst
@@ -231,7 +231,7 @@ Example:
HttpGetRequestHost
~~~~~~~~~~~~~~~~~~
-Get the host from libhtp's tx->request_hostname, which can either be
+Get the host from libhtp's htp_tx_request_hostname(tx), which can either be
the host portion of the url or the host portion of the Host header.
Example:
diff --git a/doc/userguide/upgrade.rst b/doc/userguide/upgrade.rst
index 63e2146280ab..8adae3730021 100644
--- a/doc/userguide/upgrade.rst
+++ b/doc/userguide/upgrade.rst
@@ -97,6 +97,10 @@ Logging changes
- RFB security result is now consistently logged as ``security_result`` when it was
sometimes logged with a dash instead of an underscore.
+Other Changes
+~~~~~~~~~~~~~
+- libhtp has been replaced with a rust version. This means libhtp is no longer built and linked as a shared library, and the libhtp dependency is now built directly into suricata.
+
Upgrading 6.0 to 7.0
--------------------
diff --git a/doxygen.cfg b/doxygen.cfg
index 22fc4543a34d..c110daa439a5 100644
--- a/doxygen.cfg
+++ b/doxygen.cfg
@@ -829,7 +829,7 @@ WARN_LOGFILE =
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.
-INPUT = src/ libhtp/htp/ examples/
+INPUT = src/ examples/
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
diff --git a/examples/lib/simple/Makefile.am b/examples/lib/simple/Makefile.am
index c4004b9446c9..32821827392a 100644
--- a/examples/lib/simple/Makefile.am
+++ b/examples/lib/simple/Makefile.am
@@ -6,7 +6,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/src
simple_LDFLAGS = $(all_libraries) $(SECLDFLAGS)
simple_LDADD = "-Wl,--start-group,$(top_builddir)/src/libsuricata_c.a,../../$(RUST_SURICATA_LIB),--end-group" $(RUST_LDADD)
-if HTP_LDADD
-simple_LDADD += ../../$(HTP_LDADD)
-endif
simple_DEPENDENCIES = $(top_builddir)/src/libsuricata_c.a ../../$(RUST_SURICATA_LIB)
diff --git a/libsuricata-config.in b/libsuricata-config.in
index 1fabe0765268..94bbf9a81b98 100644
--- a/libsuricata-config.in
+++ b/libsuricata-config.in
@@ -9,8 +9,6 @@ LIBS="@LIBS@ @RUST_LDADD@"
shared_lib="-lsuricata"
static_lib="-lsuricata_c -lsuricata_rust"
-enable_non_bundled_htp="@enable_non_bundled_htp@"
-
lib="$shared_lib"
show_libs="no"
@@ -47,12 +45,6 @@ if [ "$use_static" = "no" ]; then
fi
fi
-# If we're using a bundled htp, add it to the libs as well. It will
-# already be present if we're use a non-bundled libhtp.
-if [ "$enable_non_bundled_htp" = "no" ]; then
- lib="${lib} -lhtp"
-fi
-
output=""
if [ "$show_cflags" = "yes" ]; then
diff --git a/requirements.txt b/requirements.txt
index 6df1358f075f..537f896bfd79 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,6 @@
-# Specify libhtp and suricata-update requirements.
+# Specify suricata-update requirements.
#
# Format:
#
# name {repo} {branch|tag}
-libhtp https://github.com/OISF/libhtp 0.5.x
suricata-update https://github.com/OISF/suricata-update master
diff --git a/rules/http-events.rules b/rules/http-events.rules
index e08d84eb46cd..7e4d1fd258e0 100644
--- a/rules/http-events.rules
+++ b/rules/http-events.rules
@@ -96,4 +96,5 @@ alert http any any -> any any (msg:"SURICATA HTTP request missing protocol"; flo
alert http any any -> any any (msg:"SURICATA HTTP request too many headers"; flow:established,to_server; app-layer-event:http.request_too_many_headers; classtype:protocol-command-decode; sid:2221056; rev:1;)
alert http any any -> any any (msg:"SURICATA HTTP response too many headers"; flow:established,to_client; app-layer-event:http.response_too_many_headers; classtype:protocol-command-decode; sid:2221057; rev:1;)
-# next sid 2221058
+#alert http any any -> any any (msg:"SURICATA HTTP response chunk extension"; flow:established; app-layer-event:http.response_chunk_extension; classtype:protocol-command-decode; sid:2221058; rev:1;)
+# next sid 2221059
diff --git a/rust/Cargo.toml.in b/rust/Cargo.toml.in
index eaeacff8356e..80a3d78662e2 100644
--- a/rust/Cargo.toml.in
+++ b/rust/Cargo.toml.in
@@ -7,7 +7,7 @@ edition = "2021"
rust-version = "1.67.1"
[workspace]
-members = [".", "./derive"]
+members = [".", "./derive", "./htp"]
[lib]
crate-type = ["staticlib", "rlib"]
@@ -71,5 +71,7 @@ suricata-derive = { path = "./derive", version = "@PACKAGE_VERSION@" }
suricata-lua-sys = { version = "0.1.0-alpha.5" }
+htp = { path = "./htp", version = "2.0.0" }
+
[dev-dependencies]
test-case = "~3.3.1"
diff --git a/rust/Makefile.am b/rust/Makefile.am
index d53eb97090e1..2482d8f31ac6 100644
--- a/rust/Makefile.am
+++ b/rust/Makefile.am
@@ -1,10 +1,12 @@
-EXTRA_DIST = src derive \
+EXTRA_DIST = src derive htp \
.cargo/config.toml.in \
cbindgen.toml \
dist/rust-bindings.h \
+ dist/htp/htp_rs.h \
vendor \
Cargo.toml Cargo.lock \
- derive/Cargo.toml
+ derive/Cargo.toml \
+ htp/Cargo.toml
if !DEBUG
RELEASE = --release
@@ -61,6 +63,7 @@ all-local: Cargo.toml
$(RUST_SURICATA_LIBDIR)/${RUST_SURICATA_LIBNAME}; \
fi
$(MAKE) gen/rust-bindings.h
+ $(MAKE) gen/htp/htp_rs.h
install-library:
$(MKDIR_P) "$(DESTDIR)$(libdir)"
@@ -92,6 +95,15 @@ else
gen/rust-bindings.h:
endif
+if HAVE_CBINDGEN
+gen/htp/htp_rs.h: $(RUST_SURICATA_LIB)
+ cd $(abs_top_srcdir)/rust/htp && \
+ cbindgen --config $(abs_top_srcdir)/rust/htp/cbindgen.toml \
+ --quiet --verify --output $(abs_top_builddir)/rust/gen/htp/htp_rs.h || true
+else
+gen/htp/htp_rs.h:
+endif
+
doc:
CARGO_HOME=$(CARGO_HOME) $(CARGO) doc --all-features --no-deps
@@ -103,6 +115,15 @@ else
dist/rust-bindings.h:
endif
+if HAVE_CBINDGEN
+dist/htp/htp_rs.h:
+ cd $(abs_top_srcdir)/rust/htp && \
+ cbindgen --config cbindgen.toml \
+ --quiet --output $(abs_top_builddir)/rust/dist/htp/htp_rs.h
+else
+dist/htp/htp_rs.h:
+endif
+
Cargo.toml: Cargo.toml.in
update-lock: Cargo.toml
diff --git a/rust/htp/.gitignore b/rust/htp/.gitignore
new file mode 100644
index 000000000000..01c356623241
--- /dev/null
+++ b/rust/htp/.gitignore
@@ -0,0 +1 @@
+!Cargo.toml
diff --git a/rust/htp/Cargo.toml b/rust/htp/Cargo.toml
new file mode 100644
index 000000000000..10b4d0eca043
--- /dev/null
+++ b/rust/htp/Cargo.toml
@@ -0,0 +1,44 @@
+[package]
+name = "htp"
+authors = ["ivanr = Ivan Ristic ", "cccs = Canadian Centre for Cyber Security"]
+version = "2.0.0"
+publish = false
+edition = "2018"
+autobins = false
+license-file = "LICENSE"
+description = "Security Aware HTP Protocol parsing library"
+readme = "README.md"
+repository = "https://github.com/CybercentreCanada/libhtp-rs-internal"
+homepage = "https://github.com/CybercentreCanada/libhtp-rs-internal"
+keywords = ["parser", "HTTP", "protocol", "network", "api"]
+categories = ["parsing", "network-programming"]
+include = [
+ "Cargo.toml",
+ "LICENSE",
+ "README.md",
+ "src/**/*.rs",
+ "cbindgen.toml",
+]
+
+[lib]
+crate-type = ["staticlib", "rlib", "cdylib"]
+
+[features]
+default = ["cbindgen"]
+
+[dependencies]
+base64 = "0.12.3"
+bstr = "0.2"
+libc = "0.2"
+nom = "7.1.1"
+lzma-rs = { version = "0.2.0", features = ["stream"] }
+flate2 = { version = "~1.0.19", features = ["zlib"], default-features = false }
+lazy_static = "1.4.0"
+time = "=0.3.36"
+
+[dev-dependencies]
+rstest = "0.12.0"
+
+[build-dependencies]
+cbindgen = { version = "0.14.1", optional = true }
+cdylib-link-lines = "0.1.1"
diff --git a/rust/htp/LICENSE b/rust/htp/LICENSE
new file mode 100644
index 000000000000..3d4227e3a2da
--- /dev/null
+++ b/rust/htp/LICENSE
@@ -0,0 +1,31 @@
+Copyright (c) 2009-2010 Open Information Security Foundation
+Copyright (c) 2010-2013 Qualys, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+- Neither the name of the Qualys, Inc. nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/rust/htp/README.md b/rust/htp/README.md
new file mode 100644
index 000000000000..b7ceea73f33b
--- /dev/null
+++ b/rust/htp/README.md
@@ -0,0 +1,67 @@
+# LibHTP
+
+---
+
+Copyright 2009-2010 Open Information Security Foundation
+Copyright 2010-2013 Qualys, Inc.
+
+---
+
+LibHTP is a security-aware parser for the HTTP protocol and the related bits
+and pieces. The goals of the project, in the order of importance, are as
+follows:
+
+ 1. Completeness of coverage; LibHTP must be able to parse virtually all
+ traffic that is found in practice.
+
+ 2. Permissive parsing; LibHTP must never fail to parse a stream that would
+ be parsed by some other web server.
+
+ 3. Awareness of evasion techniques; LibHTP must be able to detect and
+ effectively deal with various evasion techniques, producing, where
+ practical, identical or practically identical results as the web
+ server processing the same traffic stream.
+
+ 4. Performance; The performance must be adequate for the desired tasks.
+ Completeness and security are often detrimental to performance. Our
+ idea of handling the conflicting requirements is to put the library
+ user in control, allowing him to choose the most desired library
+ characteristic.
+
+ | STATUS LIBHTP IS VERY YOUNG AT THIS POINT. IT WILL BE SOME TIME BEFORE
+ | IT CAN BE CONSIDER COMPLETE. AT THE MOMENT, THE FOCUS OF DEVELOPMENT
+ | IS ON ACHIEVING THE FIRST TWO GOALS.
+
+See the LICENSE file distributed with this work for information
+regarding licensing, copying and copyright ownership.
+
+
+# Usage
+Start using libHTP by including it in your project's `Cargo.toml`
+dependencies. The base library will also be required for using common
+types.
+
+**The minimum supported version of `rustc` is `1.58.1`.**
+
+## Example
+```
+[dependencies]
+htp = "2.0.0"
+```
+
+## FFI Support
+LibHTP has a foreign function interface for use in C/C++ projects.
+FFI Support can be enabled by building with the `cbindgen` feature.
+
+```
+# Install cbindgen which is required to generate headers
+cargo install --force cbindgen
+
+# Build headers and shared objects
+make
+```
+
+## LICENSE
+
+LibHTP is licensed under the BSD 3-Clause license (also known as "BSD New" and
+"BSD Simplified".) The complete text of the license is enclosed in the file LICENSE.
diff --git a/rust/htp/cbindgen.toml b/rust/htp/cbindgen.toml
new file mode 100644
index 000000000000..8d132e07383a
--- /dev/null
+++ b/rust/htp/cbindgen.toml
@@ -0,0 +1,95 @@
+language = "C"
+
+# Header wrapping options
+#header = "LICENSE here"
+#trailer = ""
+include_guard = "_HTP_H"
+autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Do NOT modify manually */"
+#include_version = true
+#sys_includes = [] # Sys headers
+includes = []
+no_includes = false
+cpp_compat = true
+#after_includes = ""
+
+# Code style
+#braces = "SameLine"
+#line_length = 100
+#tab_wideth = 2
+#documentation_style = auto
+
+# Codegen
+style = "both"
+
+after_includes = """
+#define htp_status_t HtpStatus
+#define htp_server_personality_t HtpServerPersonality
+#define htp_protocol_t HtpProtocol
+#define htp_unwanted_t HtpUnwanted
+#define htp_url_encoding_handling_t HtpUrlEncodingHandling
+#define htp_stream_state_t HtpStreamState
+#define htp_content_encoding_t HtpContentEncoding
+#define htp_log_code_t HtpLogCode
+#define htp_log_level_t HtpLogLevel
+#define htp_method_t HtpMethod
+#define htp_data_source_t HtpDataSource
+#define htp_parser_id_t HtpParserId
+#define htp_transfer_coding_t HtpTransferCoding
+#define htp_res_progress_t HtpResponseProgress
+#define htp_req_progress_t HtpRequestProgress
+"""
+
+[export.rename]
+"ConnectionFlags" = "HTP_CONNECTION_FLAGS"
+"HeaderFlags" = "HTP_HEADER_FLAGS"
+"HtpFlags" = "HTP_FLAGS"
+"Config" = "htp_cfg_t"
+"Connection" = "htp_conn_t"
+"ConnectionParser" = "htp_connp_t"
+"Header" = "htp_header_t"
+"Headers" = "htp_headers_t"
+"Param" = "htp_param_t"
+"Data" = "htp_tx_data_t"
+"Transaction" = "htp_tx_t"
+"Transactions" = "htp_txs_t"
+"Uri" = "htp_uri_t"
+"Bstr" = "bstr"
+"Table" = "htp_table_t"
+"Log" = "htp_log_t"
+"timeval" = "struct timeval"
+"Logs" = "htp_logs_t"
+
+[export]
+include = ["HtpStatus",
+"HtpServerPersonality",
+"HtpProtocol",
+"HtpUnwanted",
+"HtpUrlEncodingHandling",
+"HtpStreamState",
+"HtpContentEncoding",
+"HtpLogCode",
+"HtpLogLevel",
+"HtpMethod",
+"HtpDataSource",
+"HtpParserId",
+"HtpTransferCoding",
+"HtpResponseProgress",
+"HtpRequestProgress",
+"HtpFlags",
+"HeaderFlags",
+"ConnectionFlags"]
+
+[enum]
+rename_variants = "QualifiedScreamingSnakeCase"
+prefix_with_name = false
+
+[macro_expansion]
+bitflags = true
+
+# Rust parsing options
+[parse]
+parse_deps = false
+clean = false
+
+[parse.expand]
+features = ["cbindgen"]
diff --git a/rust/htp/fuzz/Cargo.toml b/rust/htp/fuzz/Cargo.toml
new file mode 100644
index 000000000000..4ef6ca68c330
--- /dev/null
+++ b/rust/htp/fuzz/Cargo.toml
@@ -0,0 +1,25 @@
+
+[package]
+name = "htp-fuzz"
+version = "0.0.1"
+authors = ["Automatically generated"]
+publish = false
+edition = "2018"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies.htp]
+path = ".."
+[dependencies.libfuzzer-sys]
+git = "https://github.com/rust-fuzz/libfuzzer-sys.git"
+
+[dependencies]
+
+# Prevent this from interfering with workspaces
+[workspace]
+members = ["."]
+
+[[bin]]
+name = "fuzz_htp_rs"
+path = "fuzz_targets/fuzz_htp.rs"
diff --git a/rust/htp/fuzz/fuzz_targets/fuzz_htp.rs b/rust/htp/fuzz/fuzz_targets/fuzz_htp.rs
new file mode 100644
index 000000000000..fb8ff542e488
--- /dev/null
+++ b/rust/htp/fuzz/fuzz_targets/fuzz_htp.rs
@@ -0,0 +1,14 @@
+#![allow(non_snake_case)]
+#![no_main]
+#[macro_use] extern crate libfuzzer_sys;
+
+extern crate htp;
+
+use htp::test::{Test, TestConfig};
+use std::env;
+
+
+fuzz_target!(|data: &[u8]| {
+ let mut t = Test::new(TestConfig());
+ t.run_slice(data);
+});
diff --git a/rust/htp/src/bstr.rs b/rust/htp/src/bstr.rs
new file mode 100644
index 000000000000..2d85f3f125e0
--- /dev/null
+++ b/rust/htp/src/bstr.rs
@@ -0,0 +1,482 @@
+use bstr::{BString, ByteSlice};
+use core::cmp::Ordering;
+use std::ops::{Deref, DerefMut};
+
+/// Bstr is a convenience wrapper around binary data that adds string-like functions.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Bstr {
+ // Wrap a BString under the hood. We want to be able to
+ // implement behaviours on top of this if needed, so we wrap
+ // it instead of exposing it directly in our public API.
+ s: BString,
+}
+
+impl Default for Bstr {
+ fn default() -> Self {
+ Self {
+ s: BString::from(Vec::new()),
+ }
+ }
+}
+
+impl Bstr {
+ /// Make a new owned Bstr
+ pub fn new() -> Self {
+ Bstr {
+ s: BString::from(Vec::new()),
+ }
+ }
+
+ /// Make a new owned Bstr with given capacity
+ pub fn with_capacity(len: usize) -> Self {
+ Bstr {
+ s: BString::from(Vec::with_capacity(len)),
+ }
+ }
+
+ /// Split the Bstr into a a collection of substrings, seperated by the given byte string.
+ /// Each element yielded is guaranteed not to include the splitter substring.
+ /// Returns a Vector of the substrings.
+ pub fn split_str_collect<'b, B: ?Sized + AsRef<[u8]>>(
+ &'b self, splitter: &'b B,
+ ) -> Vec<&'b [u8]> {
+ self.s.as_bstr().split_str(splitter.as_ref()).collect()
+ }
+
+ /// Compare this bstr with the given slice
+ pub fn cmp_slice>(&self, other: B) -> Ordering {
+ self.as_slice().cmp(other.as_ref())
+ }
+
+ /// Return true if self is equal to other
+ pub fn eq_slice>(&self, other: B) -> bool {
+ self.cmp_slice(other) == Ordering::Equal
+ }
+
+ /// Compare bstr with the given slice, ingnoring ascii case.
+ pub fn cmp_nocase>(&self, other: B) -> Ordering {
+ let lefts = &self.as_slice();
+ let rights = &other.as_ref();
+ let left = LowercaseIterator::new(lefts);
+ let right = LowercaseIterator::new(rights);
+ left.cmp(right)
+ }
+
+ /// Compare trimmed bstr with the given slice, ingnoring ascii case.
+ pub fn cmp_nocase_trimmed>(&self, other: B) -> Ordering {
+ let lefts = &self.trim_with(|c| c.is_ascii_whitespace());
+ let rights = &other.as_ref();
+ let left = LowercaseIterator::new(lefts);
+ let right = LowercaseIterator::new(rights);
+ left.cmp(right)
+ }
+
+ /// Return true if self is equal to other ignoring ascii case
+ pub fn eq_nocase>(&self, other: B) -> bool {
+ self.cmp_nocase(other) == Ordering::Equal
+ }
+
+ /// Case insensitive comparison between self and other, ignoring any zeros in self
+ pub fn cmp_nocase_nozero>(&self, other: B) -> Ordering {
+ let lefts = &self.as_slice();
+ let rights = &other.as_ref();
+ let left = LowercaseNoZeroIterator::new(lefts);
+ let right = LowercaseIterator::new(rights);
+ left.cmp(right)
+ }
+
+ /// Case insensitive comparison between trimmed self and other, ignoring any zeros in self
+ pub fn cmp_nocase_nozero_trimmed>(&self, other: B) -> Ordering {
+ let lefts = &self.trim();
+ let rights = &other.as_ref();
+ let left = LowercaseNoZeroIterator::new(lefts);
+ let right = LowercaseIterator::new(rights);
+ left.cmp(right)
+ }
+
+ /// Return true if self is equal to other, ignoring ascii case and zeros in self
+ pub fn eq_nocase_nozero>(&self, other: B) -> bool {
+ self.cmp_nocase_nozero(other) == Ordering::Equal
+ }
+
+ /// Extend this bstr with the given slice
+ pub fn add>(&mut self, other: B) {
+ self.extend_from_slice(other.as_ref())
+ }
+
+ /// Extend the bstr as much as possible without growing
+ pub fn add_noex>(&mut self, other: B) {
+ let len = std::cmp::min(self.capacity() - self.len(), other.as_ref().len());
+ self.add(&other.as_ref()[..len]);
+ }
+
+ /// Return true if this bstr starts with other
+ pub fn starts_with>(&self, other: B) -> bool {
+ self.as_slice().starts_with(other.as_ref())
+ }
+
+ /// Return true if this bstr starts with other, ignoring ascii case
+ pub fn starts_with_nocase>(&self, other: B) -> bool {
+ if self.len() < other.as_ref().len() {
+ return false;
+ }
+ let len: usize = std::cmp::min(self.len(), other.as_ref().len());
+ self.as_slice()[..len].eq_ignore_ascii_case(&other.as_ref()[..len])
+ }
+
+ /// Find the index of the given slice
+ pub fn index_of>(&self, other: B) -> Option {
+ self.find(other.as_ref())
+ }
+
+ /// Find the index of the given slice ignoring ascii case
+ pub fn index_of_nocase>(&self, other: B) -> Option {
+ let src = self.as_slice();
+ let mut haystack = LowercaseIterator::new(&src);
+ let needle = other.as_ref().to_ascii_lowercase();
+ haystack.index_of(&needle)
+ }
+
+ /// Find the index of the given slice ignoring ascii case and any zeros in self
+ pub fn index_of_nocase_nozero>(&self, other: B) -> Option {
+ let src = self.as_slice();
+ let mut haystack = LowercaseNoZeroIterator::new(&src);
+ let needle = other.as_ref().to_ascii_lowercase();
+ haystack.index_of(&needle)
+ }
+}
+
+// Trait Implementations for Bstr
+
+/// Let callers access BString functions
+impl Deref for Bstr {
+ type Target = BString;
+
+ fn deref(&self) -> &Self::Target {
+ &self.s
+ }
+}
+
+/// Let callers access mutable BString functions
+impl DerefMut for Bstr {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.s
+ }
+}
+
+impl From<&[u8]> for Bstr {
+ fn from(src: &[u8]) -> Self {
+ Bstr {
+ s: BString::from(src),
+ }
+ }
+}
+
+impl From<&str> for Bstr {
+ fn from(src: &str) -> Self {
+ src.as_bytes().into()
+ }
+}
+
+impl From> for Bstr {
+ fn from(src: Vec) -> Self {
+ Bstr {
+ s: BString::from(src),
+ }
+ }
+}
+
+/// Compare a Bstr to a &str byte for byte
+impl PartialEq<&str> for Bstr {
+ fn eq(&self, rhs: &&str) -> bool {
+ self.as_bytes() == rhs.as_bytes()
+ }
+}
+
+/// A trait that lets us find the byte index of slices in a generic way.
+///
+/// This layer of abstraction is motivated by the need to find needle in
+/// haystack when we want to perform case sensitive, case insensitive, and
+/// case insensitive + zero skipping. All of these algorithms are identical
+/// except we compare the needle bytes with the src bytes in different ways,
+/// and in the case of zero skipping we want to pretend that zero bytes in
+/// the haystack do not exist. So we define iterators for each of lowercase
+/// and lowercase + zero skipping, and then implement this trait for both of
+/// those, and then define the search function in terms of this trait.
+trait SubIterator: Iterator {
+ /// Return a new iterator of the same type starting at the current byte index
+ fn subiter(&self) -> Self;
+ /// Return the current byte index into the iterator
+ fn index(&self) -> usize;
+ /// Find the given needle in self and return the byte index
+ fn index_of(&mut self, needle: impl AsRef<[u8]>) -> Option;
+}
+
+/// Find the byte index of the given slice in the source.
+///
+/// Someday an enterprising soul can implement this function inside SubIterator
+/// directly (where it arguably belongs), but this involves handling dyn Self,
+/// and implementing it this way lets monomorphization emit concrete
+/// implementations for each of the two types we actually have.
+fn index_of>(haystack: &mut T, needle: &S) -> Option {
+ let first = needle.as_ref().first()?;
+ while let Some(s) = haystack.next() {
+ if s == *first {
+ let mut test = haystack.subiter();
+ let mut equal = false;
+ for cmp_byte in needle.as_ref().as_bytes() {
+ equal = Some(*cmp_byte) == test.next();
+ if !equal {
+ break;
+ }
+ }
+ if equal {
+ return Some(haystack.index());
+ }
+ }
+ }
+ None
+}
+
+/// A convenience iterator for anything that satisfies AsRef<[u8]>
+/// that yields lowercase ascii bytes and skips null bytes
+struct LowercaseNoZeroIterator<'a, T: AsRef<[u8]>> {
+ src: &'a T,
+ idx: usize,
+ first: bool,
+}
+
+impl<'a, T: AsRef<[u8]>> LowercaseNoZeroIterator<'a, T> {
+ fn new(src: &'a T) -> Self {
+ LowercaseNoZeroIterator {
+ src,
+ idx: 0,
+ first: true,
+ }
+ }
+}
+
+impl> Iterator for LowercaseNoZeroIterator<'_, T> {
+ type Item = u8;
+
+ fn next(&mut self) -> Option {
+ loop {
+ if self.first {
+ self.first = false;
+ } else {
+ self.idx += 1;
+ }
+ let next = self
+ .src
+ .as_ref()
+ .get(self.idx)
+ .map(|c| c.to_ascii_lowercase());
+ if next != Some(0) {
+ break next;
+ }
+ }
+ }
+}
+
+impl> SubIterator for LowercaseNoZeroIterator<'_, T> {
+ fn subiter(&self) -> Self {
+ LowercaseNoZeroIterator {
+ src: self.src,
+ idx: self.idx,
+ first: true,
+ }
+ }
+
+ fn index(&self) -> usize {
+ self.idx
+ }
+
+ fn index_of(&mut self, needle: impl AsRef<[u8]>) -> Option {
+ index_of(self, &needle)
+ }
+}
+
+/// A convenience iterator for anything that satisfies AsRef<[u8]>
+/// that yields lowercase ascii bytes
+struct LowercaseIterator<'a, T: AsRef<[u8]>> {
+ src: &'a T,
+ idx: usize,
+ first: bool,
+}
+
+impl<'a, T: AsRef<[u8]>> LowercaseIterator<'a, T> {
+ fn new(src: &'a T) -> Self {
+ LowercaseIterator {
+ src,
+ idx: 0,
+ first: true,
+ }
+ }
+}
+
+impl> Iterator for LowercaseIterator<'_, T> {
+ type Item = u8;
+ fn next(&mut self) -> Option {
+ if self.first {
+ self.first = false;
+ } else {
+ self.idx += 1;
+ }
+ self.src
+ .as_ref()
+ .get(self.idx)
+ .map(|c| c.to_ascii_lowercase())
+ }
+}
+
+impl> SubIterator for LowercaseIterator<'_, T> {
+ fn subiter(&self) -> Self {
+ LowercaseIterator {
+ src: self.src,
+ idx: self.idx,
+ first: true,
+ }
+ }
+
+ fn index(&self) -> usize {
+ self.idx
+ }
+
+ fn index_of(&mut self, needle: impl AsRef<[u8]>) -> Option {
+ index_of(self, &needle)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::bstr::*;
+ use core::cmp::Ordering;
+ use rstest::rstest;
+
+ #[test]
+ fn Compare() {
+ let b = Bstr::from("ABCDefgh");
+ // direct equality
+ assert_eq!(Ordering::Equal, b.cmp_slice("ABCDefgh"));
+ // case sensitive
+ assert_ne!(Ordering::Equal, b.cmp_slice("abcdefgh"));
+ // src shorter than dst
+ assert_eq!(Ordering::Less, b.cmp_slice("ABCDefghi"));
+ // src longer than dst
+ assert_eq!(Ordering::Greater, b.cmp_slice("ABCDefg"));
+ // case less
+ assert_eq!(Ordering::Less, b.cmp_slice("abcdefgh"));
+ // case greater
+ assert_eq!(Ordering::Greater, b.cmp_slice("ABCDEFGH"));
+ }
+
+ #[test]
+ fn CompareNocase() {
+ let b = Bstr::from("ABCDefgh");
+ assert_eq!(Ordering::Equal, b.cmp_nocase("ABCDefgh"));
+ assert_eq!(Ordering::Equal, b.cmp_nocase("abcdefgh"));
+ assert_eq!(Ordering::Equal, b.cmp_nocase("ABCDEFGH"));
+ assert_eq!(Ordering::Less, b.cmp_nocase("ABCDefghi"));
+ assert_eq!(Ordering::Greater, b.cmp_nocase("ABCDefg"));
+ }
+
+ #[test]
+ fn CompareNocaseNozero() {
+ // nocase_nozero only applies to the source string. The caller
+ // is not expected to pass in a search string with nulls in it.
+ let b = Bstr::from("A\x00B\x00\x00C\x00Defg\x00h");
+ assert_eq!(Ordering::Equal, b.cmp_nocase_nozero("ABCDefgh"));
+ assert_eq!(Ordering::Equal, b.cmp_nocase_nozero("abcdefgh"));
+ assert_eq!(Ordering::Equal, b.cmp_nocase_nozero("ABCDEFGH"));
+ assert_eq!(Ordering::Less, b.cmp_nocase_nozero("ABCDefghi"));
+ assert_eq!(Ordering::Greater, b.cmp_nocase_nozero("ABCDefg"));
+ }
+
+ #[rstest]
+ #[case("abc", "defgh", "abcdefgh")]
+ #[case("ABC", "DEFGH", "ABCDEFGH")]
+ #[case("aBc", "Defgh", "aBcDefgh")]
+ #[case(
+ "TestLongerDataBc",
+ "Defghikjlmnopqrstuvwxyz",
+ "TestLongerDataBcDefghikjlmnopqrstuvwxyz"
+ )]
+ fn test_add(#[case] input: &str, #[case] input_add: &str, #[case] expected: &str) {
+ let mut b = Bstr::from(input);
+ b.add(input_add);
+ assert_eq!(b.cmp_slice(expected), Ordering::Equal);
+ }
+
+ #[rstest]
+ #[case(10, "abcd", "efghij", "abcdefghij")]
+ #[case(5, "ABcd", "efgh", "ABcde")]
+ #[case(4, "AbCd", "EFGH", "AbCd")]
+ #[case(20, "abcd", "efGHij", "abcdefGHij")]
+ fn test_add_no_ex(
+ #[case] capacity: usize, #[case] input: &str, #[case] input_add: &str,
+ #[case] expected: &str,
+ ) {
+ let mut b = Bstr::with_capacity(capacity);
+ b.add_noex(input);
+ b.add_noex(input_add);
+ assert_eq!(b.cmp_slice(expected), Ordering::Equal);
+ }
+
+ #[test]
+ fn StartsWith() {
+ let b = Bstr::from("ABCD");
+ assert!(b.starts_with("AB"));
+ assert!(!b.starts_with("ab"));
+ assert!(!b.starts_with("Ab"));
+ assert!(!b.starts_with("aB"));
+ assert!(!b.starts_with("CD"));
+ }
+
+ #[test]
+ fn StartsWithNocase() {
+ let b = Bstr::from("ABCD");
+ assert!(b.starts_with_nocase("AB"));
+ assert!(b.starts_with_nocase("ab"));
+ assert!(b.starts_with_nocase("Ab"));
+ assert!(b.starts_with_nocase("aB"));
+ assert!(!b.starts_with_nocase("CD"));
+ }
+
+ #[test]
+ fn IndexOf() {
+ let b = Bstr::from("ABCDefgh");
+ assert_eq!(Some(4), b.index_of("e"));
+ assert_eq!(Some(0), b.index_of("A"));
+ assert_eq!(Some(7), b.index_of("h"));
+ assert_eq!(Some(3), b.index_of("De"));
+ assert_eq!(None, b.index_of("z"));
+ assert_eq!(None, b.index_of("a"));
+ assert_eq!(None, b.index_of("hi"));
+ }
+
+ #[test]
+ fn IndexOfNocase() {
+ let b = Bstr::from("ABCDefgh");
+ assert_eq!(Some(4), b.index_of_nocase("E"));
+ assert_eq!(Some(0), b.index_of_nocase("a"));
+ assert_eq!(Some(0), b.index_of_nocase("A"));
+ assert_eq!(Some(7), b.index_of_nocase("H"));
+ assert_eq!(Some(3), b.index_of_nocase("dE"));
+ assert_eq!(None, b.index_of_nocase("z"));
+ assert_eq!(None, b.index_of_nocase("Hi"));
+ }
+
+ #[test]
+ fn IndexOfNocaseNozero() {
+ let b = Bstr::from("A\x00B\x00\x00C\x00Defg\x00h");
+ assert_eq!(Some(8), b.index_of_nocase_nozero("E"));
+ assert_eq!(Some(0), b.index_of_nocase_nozero("a"));
+ assert_eq!(Some(0), b.index_of_nocase_nozero("A"));
+ assert_eq!(Some(12), b.index_of_nocase_nozero("H"));
+ assert_eq!(Some(7), b.index_of_nocase_nozero("dE"));
+ assert_eq!(Some(2), b.index_of_nocase_nozero("bc"));
+ assert_eq!(None, b.index_of_nocase_nozero("z"));
+ assert_eq!(None, b.index_of_nocase_nozero("Hi"));
+ assert_eq!(None, b.index_of_nocase_nozero("ghi"));
+ }
+}
diff --git a/rust/htp/src/c_api/bstr.rs b/rust/htp/src/c_api/bstr.rs
new file mode 100644
index 000000000000..d1f55288adeb
--- /dev/null
+++ b/rust/htp/src/c_api/bstr.rs
@@ -0,0 +1,201 @@
+use crate::bstr::Bstr;
+use core::cmp::Ordering;
+use std::{boxed::Box, ffi::CStr};
+
+/// Allocate a zero-length bstring, reserving space for at least size bytes.
+#[no_mangle]
+pub extern "C" fn bstr_alloc(len: libc::size_t) -> *mut Bstr {
+ let b = Bstr::with_capacity(len);
+ let boxed = Box::new(b);
+ Box::into_raw(boxed)
+}
+
+/// Deallocate the supplied bstring instance. Allows NULL on input.
+/// # Safety
+/// This function is unsafe because improper use may lead to memory problems. For example, a double-free may occur if the function is called twice on the same raw pointer.
+#[no_mangle]
+pub unsafe extern "C" fn bstr_free(b: *mut Bstr) {
+ if !b.is_null() {
+ drop(Box::from_raw(b));
+ }
+}
+
+/// Return the length of the string
+/// # Safety
+/// x must be properly intialized: not NULL, dangling, or misaligned
+#[no_mangle]
+pub unsafe extern "C" fn bstr_len(x: *const Bstr) -> libc::size_t {
+ (*x).len()
+}
+
+/// Return a pointer to the bstr payload
+/// # Safety
+/// x must be properly intialized: not NULL, dangling, or misaligned
+#[no_mangle]
+pub unsafe extern "C" fn bstr_ptr(x: *const Bstr) -> *mut libc::c_uchar {
+ (*x).as_ptr() as *mut u8
+}
+
+/// Return the capacity of the string
+/// # Safety
+/// x must be properly intialized: not NULL, dangling, or misaligned
+#[no_mangle]
+pub unsafe extern "C" fn bstr_size(x: *const Bstr) -> libc::size_t {
+ (*x).capacity()
+}
+
+/// Case-sensitive comparison of a bstring and a NUL-terminated string.
+/// returns -1 if b is less than c
+/// 0 if b is equal to c
+/// 1 if b is greater than c
+/// # Safety
+/// b and c must be properly intialized: not NULL, dangling, or misaligned.
+/// c must point to memory that contains a valid nul terminator byte at the end of the string
+#[no_mangle]
+pub unsafe extern "C" fn bstr_cmp_c(b: *const Bstr, c: *const libc::c_char) -> libc::c_int {
+ let cs = CStr::from_ptr(c);
+ match (*b).cmp_slice(cs.to_bytes()) {
+ Ordering::Less => -1,
+ Ordering::Equal => 0,
+ Ordering::Greater => 1,
+ }
+}
+
+/// Case-indensitive comparison of a bstring and a NUL-terminated string.
+/// returns -1 if b is less than c
+/// 0 if b is equal to c
+/// 1 if b is greater than c
+/// # Safety
+/// b and c must be properly intialized: not NULL, dangling, or misaligned.
+/// c must point to memory that contains a valid nul terminator byte at the end of the string
+#[no_mangle]
+pub unsafe extern "C" fn bstr_cmp_c_nocase(b: *const Bstr, c: *const libc::c_char) -> libc::c_int {
+ let cs = CStr::from_ptr(c);
+ match (*b).cmp_nocase(cs.to_bytes()) {
+ Ordering::Less => -1,
+ Ordering::Equal => 0,
+ Ordering::Greater => 1,
+ }
+}
+
+/// Create a new bstring by copying the provided NUL-terminated string
+/// # Safety
+/// cstr must be properly intialized: not NULL, dangling, or misaligned.
+/// cstr must point to memory that contains a valid nul terminator byte at the end of the string
+#[no_mangle]
+pub unsafe extern "C" fn bstr_dup_c(cstr: *const libc::c_char) -> *mut Bstr {
+ let cs = CStr::from_ptr(cstr).to_bytes();
+ let new = bstr_alloc(cs.len());
+ (*new).add(cs);
+ new
+}
+
+/// Create a new NUL-terminated string out of the provided bstring. If NUL bytes
+/// are contained in the bstring, each will be replaced with "\0" (two characters).
+/// The caller is responsible to keep track of the allocated memory area and free
+/// it once it is no longer needed.
+/// returns The newly created NUL-terminated string, or NULL in case of memory
+/// allocation failure.
+/// # Safety
+/// b must be properly intialized and not dangling nor misaligned.
+#[no_mangle]
+pub unsafe extern "C" fn bstr_util_strdup_to_c(b: *const Bstr) -> *mut libc::c_char {
+ if b.is_null() {
+ return std::ptr::null_mut();
+ }
+ let src = std::slice::from_raw_parts(bstr_ptr(b), bstr_len(b));
+
+ // Since the memory returned here is just a char* and the caller will
+ // free() it we have to use malloc() here.
+ // So we allocate enough space for doubled NULL bytes plus the trailing NULL.
+ let mut null_count = 1;
+ for byte in src {
+ if *byte == 0 {
+ null_count += 1;
+ }
+ }
+ let newlen = bstr_len(b) + null_count;
+ let mem = libc::malloc(newlen) as *mut i8;
+ if mem.is_null() {
+ return std::ptr::null_mut();
+ }
+ let dst: &mut [i8] = std::slice::from_raw_parts_mut(mem, newlen);
+ let mut dst_idx = 0;
+ for byte in src {
+ if *byte == 0 {
+ dst[dst_idx] = '\\' as i8;
+ dst_idx += 1;
+ dst[dst_idx] = '0' as i8;
+ } else {
+ dst[dst_idx] = *byte as i8;
+ }
+ dst_idx += 1;
+ }
+ dst[dst_idx] = 0;
+
+ mem
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use std::ffi::CString;
+
+ macro_rules! cstr {
+ ( $x:expr ) => {{
+ CString::new($x).unwrap()
+ }};
+ }
+
+ #[test]
+ fn Bstr_Alloc() {
+ unsafe {
+ let p1 = bstr_alloc(10);
+ assert_eq!(10, bstr_size(p1));
+ assert_eq!(0, bstr_len(p1));
+ bstr_free(p1);
+ }
+ }
+
+ #[test]
+ fn Bstr_DupC() {
+ unsafe {
+ let p1 = bstr_dup_c(cstr!("arfarf").as_ptr());
+
+ assert_eq!(6, bstr_size(p1));
+ assert_eq!(6, bstr_len(p1));
+ assert_eq!(
+ 0,
+ libc::memcmp(
+ cstr!("arfarf").as_ptr() as *const core::ffi::c_void,
+ bstr_ptr(p1) as *const core::ffi::c_void,
+ 6
+ )
+ );
+ bstr_free(p1);
+ }
+ }
+
+ #[test]
+ fn Bstr_UtilDupToC() {
+ unsafe {
+ let s = Bstr::from(b"ABCDEFGHIJKL\x00NOPQRST" as &[u8]);
+ let c = bstr_util_strdup_to_c(&s);
+ let e = CString::new("ABCDEFGHIJKL\\0NOPQRST").unwrap();
+ assert_eq!(0, libc::strcmp(e.as_ptr(), c));
+
+ libc::free(c as *mut core::ffi::c_void);
+ }
+ }
+
+ #[test]
+ fn Bstr_CmpC() {
+ unsafe {
+ let p1 = Bstr::from("arfarf");
+ assert_eq!(0, bstr_cmp_c(&p1, cstr!("arfarf").as_ptr()));
+ assert_eq!(-1, bstr_cmp_c(&p1, cstr!("arfarf2").as_ptr()));
+ assert_eq!(1, bstr_cmp_c(&p1, cstr!("arf").as_ptr()));
+ assert_eq!(-1, bstr_cmp_c(&p1, cstr!("not equal").as_ptr()));
+ }
+ }
+}
diff --git a/rust/htp/src/c_api/config.rs b/rust/htp/src/c_api/config.rs
new file mode 100644
index 000000000000..a9d1af3ed6cd
--- /dev/null
+++ b/rust/htp/src/c_api/config.rs
@@ -0,0 +1,596 @@
+#![deny(missing_docs)]
+use crate::{
+ config::{Config, HtpServerPersonality, HtpUrlEncodingHandling},
+ hook::{DataExternalCallbackFn, LogExternalCallbackFn, TxExternalCallbackFn},
+ HtpStatus,
+};
+use std::convert::TryInto;
+
+/// Creates a new configuration structure. Configuration structures created at
+/// configuration time must not be changed afterwards in order to support lock-less
+/// copying.
+#[no_mangle]
+pub extern "C" fn htp_config_create() -> *mut Config {
+ let cfg: Config = Config::default();
+ let b = Box::new(cfg);
+ Box::into_raw(b)
+}
+
+/// Destroy a configuration structure.
+/// # Safety
+/// This function is unsafe because improper use may lead to memory problems. For example, a double-free may occur if the function is called twice on the same raw pointer.
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_destroy(cfg: *mut Config) {
+ if !cfg.is_null() {
+ drop(Box::from_raw(cfg));
+ }
+}
+
+/// Registers a callback that is invoked every time there is a log message with
+/// severity equal and higher than the configured log level.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_log(cfg: *mut Config, cbk_fn: LogExternalCallbackFn) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_log.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a REQUEST_BODY_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_body_data(
+ cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_request_body_data.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a REQUEST_COMPLETE callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_complete(
+ cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_request_complete.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a REQUEST_HEADERS callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_headers(
+ cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_request_headers.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a REQUEST_HEADER_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_header_data(
+ cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_request_header_data.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a REQUEST_LINE callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_line(
+ cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_request_line.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a REQUEST_START callback, which is invoked every time a new
+/// request begins and before any parsing is done.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_start(
+ cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_request_start.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a HTP_REQUEST_TRAILER callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_trailer(
+ cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_request_trailer.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a REQUEST_TRAILER_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_trailer_data(
+ cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_request_trailer_data.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a RESPONSE_BODY_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_body_data(
+ cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_response_body_data.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a RESPONSE_COMPLETE callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_complete(
+ cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_response_complete.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a RESPONSE_HEADERS callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_headers(
+ cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_response_headers.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a RESPONSE_HEADER_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_header_data(
+ cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_response_header_data.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a RESPONSE_START callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_start(
+ cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_response_start.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a RESPONSE_TRAILER callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_trailer(
+ cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_response_trailer.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a RESPONSE_TRAILER_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_trailer_data(
+ cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_response_trailer_data.register_extern(cbk_fn)
+ }
+}
+
+/// Registers a TRANSACTION_COMPLETE callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_transaction_complete(
+ cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.hook_transaction_complete.register_extern(cbk_fn)
+ }
+}
+
+/// Configures whether backslash characters are treated as path segment separators. They
+/// are not on Unix systems, but are on Windows systems. If this setting is enabled, a path
+/// such as "/one\two/three" will be converted to "/one/two/three".
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_backslash_convert_slashes(
+ cfg: *mut Config, enabled: libc::c_int,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_backslash_convert_slashes(enabled == 1)
+ }
+}
+
+/// Sets the replacement character that will be used to in the lossy best-fit
+/// mapping from multi-byte to single-byte streams. The question mark character
+/// is used as the default replacement byte.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_bestfit_replacement_byte(cfg: *mut Config, b: libc::c_int) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_bestfit_replacement_byte(b as u8)
+ }
+}
+
+/// Configures the maximum compression bomb size LibHTP will decompress.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_compression_bomb_limit(
+ cfg: *mut Config, bomblimit: libc::size_t,
+) {
+ if let Ok(bomblimit) = bomblimit.try_into() {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.compression_options.set_bomb_limit(bomblimit)
+ }
+ }
+}
+
+/// Configures the maximum compression time LibHTP will allow.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_compression_time_limit(
+ cfg: *mut Config, timelimit: libc::c_uint,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.compression_options.set_time_limit(timelimit)
+ }
+}
+
+/// Configures whether input data will be converted to lowercase. Useful for handling servers with
+/// case-insensitive filesystems.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_convert_lowercase(cfg: *mut Config, enabled: libc::c_int) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_convert_lowercase(enabled == 1)
+ }
+}
+
+/// Configures the maximum size of the buffer LibHTP will use when all data is not available
+/// in the current buffer (e.g., a very long header line that might span several packets). This
+/// limit is controlled by the field_limit parameter.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_field_limit(cfg: *mut Config, field_limit: libc::size_t) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_field_limit(field_limit)
+ }
+}
+
+/// Configures the maximum memlimit LibHTP will pass to liblzma.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_lzma_memlimit(cfg: *mut Config, memlimit: libc::size_t) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.compression_options.set_lzma_memlimit(memlimit)
+ }
+}
+
+/// Configures the maximum number of lzma layers to pass to the decompressor.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_lzma_layers(cfg: *mut Config, limit: libc::c_int) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.compression_options.set_lzma_layers(if limit <= 0 {
+ None
+ } else {
+ limit.try_into().ok()
+ })
+ }
+}
+
+/// Configures the maximum number of live transactions per connection
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_max_tx(cfg: *mut Config, limit: u32) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.max_tx = limit;
+ }
+}
+
+/// Configures the maximum number of headers in one transaction
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_number_headers_limit(cfg: *mut Config, limit: u32) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.number_headers_limit = limit;
+ }
+}
+
+/// Configures how the server reacts to encoded NUL bytes. Some servers will stop at
+/// at NUL, while some will respond with 400 or 404. When the termination option is not
+/// used, the NUL byte will remain in the path.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_nul_encoded_terminates(
+ cfg: *mut Config, enabled: libc::c_int,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_nul_encoded_terminates(enabled == 1)
+ }
+}
+
+/// Configures the handling of raw NUL bytes. If enabled, raw NUL terminates strings.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_nul_raw_terminates(cfg: *mut Config, enabled: libc::c_int) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_nul_raw_terminates(enabled == 1)
+ }
+}
+
+/// Enable or disable request cookie parsing. Enabled by default.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_parse_request_cookies(
+ _cfg: *mut Config, _parse_request_cookies: libc::c_int,
+) {
+ // do nothing, but keep API
+}
+
+/// Configures whether consecutive path segment separators will be compressed. When enabled, a path
+/// such as "/one//two" will be normalized to "/one/two". Backslash conversion and path segment separator
+/// decoding are carried out before compression. For example, the path "/one\\/two\/%5cthree/%2f//four"
+/// will be converted to "/one/two/three/four" (assuming all 3 options are enabled).
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_path_separators_compress(
+ cfg: *mut Config, enabled: libc::c_int,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_path_separators_compress(enabled == 1)
+ }
+}
+
+/// Configures whether plus characters are converted to spaces when decoding URL-encoded strings. This
+/// is appropriate to do for parameters, but not for URLs. Only applies to contexts where decoding
+/// is taking place.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_plusspace_decode(cfg: *mut Config, enabled: libc::c_int) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_plusspace_decode(enabled == 1)
+ }
+}
+
+/// Configures whether encoded path segment separators will be decoded. Apache does not do
+/// this by default, but IIS does. If enabled, a path such as "/one%2ftwo" will be normalized
+/// to "/one/two". If the backslash_separators option is also enabled, encoded backslash
+/// characters will be converted too (and subsequently normalized to forward slashes).
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_path_separators_decode(
+ cfg: *mut Config, enabled: libc::c_int,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_path_separators_decode(enabled == 1)
+ }
+}
+
+/// Configures whether request data is decompressed
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_request_decompression(
+ cfg: *mut Config, enabled: libc::c_int,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_request_decompression(enabled == 1)
+ }
+}
+
+/// Configures many layers of compression we try to decompress.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_decompression_layer_limit(
+ cfg: *mut Config, limit: libc::c_int,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_decompression_layer_limit(if limit <= 0 {
+ None
+ } else {
+ limit.try_into().ok()
+ })
+ }
+}
+
+/// Enable or disable allowing spaces in URIs. Disabled by default.
+/// # Safety
+/// When calling this method the given cfg must be initialized or NULL.
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_allow_space_uri(cfg: *mut Config, allow_space: bool) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_allow_space_uri(allow_space)
+ }
+}
+
+/// Configure desired server personality.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_server_personality(
+ cfg: *mut Config, personality: HtpServerPersonality,
+) -> HtpStatus {
+ cfg.as_mut()
+ .map(|cfg| cfg.set_server_personality(personality).into())
+ .unwrap_or(HtpStatus::ERROR)
+}
+
+/// Configures whether %u-encoded sequences are decoded. Such sequences
+/// will be treated as invalid URL encoding if decoding is not desirable.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_u_encoding_decode(cfg: *mut Config, enabled: libc::c_int) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_u_encoding_decode(enabled == 1)
+ }
+}
+
+/// Configures how the server handles to invalid URL encoding.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_url_encoding_invalid_handling(
+ cfg: *mut Config, handling: HtpUrlEncodingHandling,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_url_encoding_invalid_handling(handling)
+ }
+}
+
+/// Controls whether the data should be treated as UTF-8 and converted to a single-byte
+/// stream using best-fit mapping.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_utf8_convert_bestfit(
+ cfg: *mut Config, enabled: libc::c_int,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_utf8_convert_bestfit(enabled == 1)
+ }
+}
+
+/// Configures whether to attempt to decode a double encoded query in the normalized uri
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_double_decode_normalized_query(
+ cfg: *mut Config, set: bool,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_double_decode_normalized_query(set)
+ }
+}
+
+/// Configures whether to attempt to decode a double encoded path in the normalized uri
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_double_decode_normalized_path(cfg: *mut Config, set: bool) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_double_decode_normalized_path(set)
+ }
+}
+
+/// Configures whether to normalize URIs into a complete or partial form.
+/// Pass `true` to use complete normalized URI or `false` to use partials.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_normalized_uri_include_all(cfg: *mut Config, set: bool) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_normalized_uri_include_all(set)
+ }
+}
+
+/// Configures whether transactions will be automatically destroyed once they
+/// are processed and all callbacks invoked. This option is appropriate for
+/// programs that process transactions as they are processed.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_tx_auto_destroy(
+ cfg: *mut Config, tx_auto_destroy: libc::c_int,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_tx_auto_destroy(tx_auto_destroy == 1)
+ }
+}
+
+/// Configures whether incomplete transactions will be flushed when a connection is closed.
+///
+/// This will invoke the transaction complete callback for each incomplete transaction. The
+/// transactions passed to the callback will not have their request and response state set
+/// to complete - they will simply be passed with the state they have within the parser at
+/// the time of the call.
+///
+/// This option is intended to be used when a connection is closing and we want to process
+/// any incomplete transactions that were in flight, or which never completed due to packet
+/// loss or parsing errors.
+///
+/// These transactions will also be removed from the parser when auto destroy is enabled.
+///
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_flush_incomplete(
+ cfg: *mut Config, flush_incomplete: libc::c_int,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_flush_incomplete(flush_incomplete == 1)
+ }
+}
+
+/// Enable or disable the built-in Urlencoded parser. Disabled by default.
+/// The parser will parse query strings and request bodies with the appropriate MIME type.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_parse_urlencoded(
+ cfg: *mut Config, parse_urlencoded: libc::c_int,
+) {
+ if let Some(cfg) = cfg.as_mut() {
+ cfg.set_parse_urlencoded(parse_urlencoded == 1)
+ }
+}
diff --git a/rust/htp/src/c_api/connection.rs b/rust/htp/src/c_api/connection.rs
new file mode 100644
index 000000000000..d2544101e3ca
--- /dev/null
+++ b/rust/htp/src/c_api/connection.rs
@@ -0,0 +1,36 @@
+#![deny(missing_docs)]
+use crate::{connection::Connection, log::Log};
+
+/// Returns the request_data_counter
+/// # Safety
+/// When calling this method, you have to ensure that conn is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_conn_request_data_counter(conn: *const Connection) -> u64 {
+ conn.as_ref()
+ .map(|conn| conn.request_data_counter)
+ .unwrap_or(0)
+}
+
+/// Returns the response_data_counter
+/// # Safety
+/// When calling this method, you have to ensure that conn is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_conn_response_data_counter(conn: *const Connection) -> u64 {
+ conn.as_ref()
+ .map(|conn| conn.response_data_counter)
+ .unwrap_or(0)
+}
+
+/// Get the next logged message from the connection
+///
+/// Returns the next log or NULL on error.
+/// The caller must free this result with htp_log_free
+/// # Safety
+/// When calling this method, you have to ensure that conn is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_conn_next_log(conn: *const Connection) -> *mut Log {
+ conn.as_ref()
+ .and_then(|conn| conn.get_next_log())
+ .map(|log| Box::into_raw(Box::new(log)))
+ .unwrap_or(std::ptr::null_mut())
+}
diff --git a/rust/htp/src/c_api/connection_parser.rs b/rust/htp/src/c_api/connection_parser.rs
new file mode 100644
index 000000000000..98ef752d4d12
--- /dev/null
+++ b/rust/htp/src/c_api/connection_parser.rs
@@ -0,0 +1,310 @@
+#![deny(missing_docs)]
+use crate::{
+ config::Config,
+ connection::Connection,
+ connection_parser::{ConnectionParser, HtpStreamState, ParserData},
+ transaction::Transaction,
+};
+use std::{
+ convert::{TryFrom, TryInto},
+ ffi::CStr,
+};
+use time::{Duration, OffsetDateTime};
+
+/// Take seconds and microseconds and return a OffsetDateTime
+fn datetime_from_sec_usec(sec: i64, usec: i64) -> Option {
+ match OffsetDateTime::from_unix_timestamp(sec) {
+ Ok(date) => Some(date + Duration::microseconds(usec)),
+ Err(_) => None,
+ }
+}
+
+/// Closes the connection associated with the supplied parser.
+///
+/// timestamp is optional
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+#[allow(clippy::useless_conversion)]
+pub unsafe extern "C" fn htp_connp_close(
+ connp: *mut ConnectionParser, timestamp: *const libc::timeval,
+) {
+ if let Some(connp) = connp.as_mut() {
+ connp.close(
+ timestamp
+ .as_ref()
+ .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into()))
+ .unwrap_or(None),
+ )
+ }
+}
+
+/// Creates a new connection parser using the provided configuration or a default configuration if NULL provided.
+/// Note the provided config will be copied into the created connection parser. Therefore, subsequent modification
+/// to the original config will have no effect.
+///
+/// Returns a new connection parser instance, or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_create(cfg: *mut Config) -> *mut ConnectionParser {
+ Box::into_raw(Box::new(ConnectionParser::new(
+ cfg.as_ref().cloned().unwrap_or_default(),
+ )))
+}
+
+/// Destroys the connection parser, its data structures, as well
+/// as the connection and its transactions.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_destroy_all(connp: *mut ConnectionParser) {
+ drop(Box::from_raw(connp));
+}
+
+/// Returns the connection associated with the connection parser.
+///
+/// Returns Connection instance, or NULL if one is not available.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_connection(connp: *const ConnectionParser) -> *const Connection {
+ connp
+ .as_ref()
+ .map(|val| &val.conn as *const Connection)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Retrieve the user data associated with this connection parser.
+/// Returns user data, or NULL if there isn't any.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_user_data(connp: *const ConnectionParser) -> *mut libc::c_void {
+ connp
+ .as_ref()
+ .and_then(|val| val.user_data::<*mut libc::c_void>())
+ .copied()
+ .unwrap_or(std::ptr::null_mut())
+}
+
+/// Associate user data with the supplied parser.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_set_user_data(
+ connp: *mut ConnectionParser, user_data: *mut libc::c_void,
+) {
+ if let Some(connp) = connp.as_mut() {
+ connp.set_user_data(Box::new(user_data))
+ }
+}
+
+/// Opens connection.
+///
+/// timestamp is optional
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+#[allow(clippy::useless_conversion)]
+pub unsafe extern "C" fn htp_connp_open(
+ connp: *mut ConnectionParser, client_addr: *const libc::c_char, client_port: libc::c_int,
+ server_addr: *const libc::c_char, server_port: libc::c_int, timestamp: *const libc::timeval,
+) {
+ if let Some(connp) = connp.as_mut() {
+ connp.open(
+ client_addr.as_ref().and_then(|client_addr| {
+ CStr::from_ptr(client_addr)
+ .to_str()
+ .ok()
+ .and_then(|val| val.parse().ok())
+ }),
+ client_port.try_into().ok(),
+ server_addr.as_ref().and_then(|server_addr| {
+ CStr::from_ptr(server_addr)
+ .to_str()
+ .ok()
+ .and_then(|val| val.parse().ok())
+ }),
+ server_port.try_into().ok(),
+ timestamp
+ .as_ref()
+ .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into()))
+ .unwrap_or(None),
+ )
+ }
+}
+
+/// Closes the connection associated with the supplied parser.
+///
+/// timestamp is optional
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+#[allow(clippy::useless_conversion)]
+pub unsafe extern "C" fn htp_connp_request_close(
+ connp: *mut ConnectionParser, timestamp: *const libc::timeval,
+) {
+ if let Some(connp) = connp.as_mut() {
+ connp.request_close(
+ timestamp
+ .as_ref()
+ .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into()))
+ .unwrap_or(None),
+ )
+ }
+}
+
+/// Process a chunk of inbound client request data
+///
+/// timestamp is optional
+/// Returns HTP_STREAM_STATE_DATA, HTP_STREAM_STATE_ERROR or HTP_STREAM_STATE_DATA_OTHER (see QUICK_START).
+/// HTP_STREAM_STATE_CLOSED and HTP_STREAM_STATE_TUNNEL are also possible.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+#[allow(clippy::useless_conversion)]
+pub unsafe extern "C" fn htp_connp_request_data(
+ connp: *mut ConnectionParser, timestamp: *const libc::timeval, data: *const libc::c_void,
+ len: libc::size_t,
+) -> HtpStreamState {
+ connp
+ .as_mut()
+ .map(|connp| {
+ connp.request_data(
+ ParserData::from((data as *const u8, len)),
+ timestamp
+ .as_ref()
+ .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into()))
+ .unwrap_or(None),
+ )
+ })
+ .unwrap_or(HtpStreamState::ERROR)
+}
+
+/// Process a chunk of outbound (server or response) data.
+///
+/// timestamp is optional.
+/// Returns HTP_STREAM_STATE_OK on state change, HTP_STREAM_STATE_ERROR on error, or HTP_STREAM_STATE_DATA when more data is needed
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+#[allow(clippy::useless_conversion)]
+pub unsafe extern "C" fn htp_connp_response_data(
+ connp: *mut ConnectionParser, timestamp: *const libc::timeval, data: *const libc::c_void,
+ len: libc::size_t,
+) -> HtpStreamState {
+ connp
+ .as_mut()
+ .map(|connp| {
+ connp.response_data(
+ ParserData::from((data as *const u8, len)),
+ timestamp
+ .as_ref()
+ .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into()))
+ .unwrap_or(None),
+ )
+ })
+ .unwrap_or(HtpStreamState::ERROR)
+}
+
+/// Get the number of transactions processed on this connection.
+///
+/// Returns the number of transactions or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_tx_size(connp: *const ConnectionParser) -> isize {
+ connp
+ .as_ref()
+ .map(|connp| isize::try_from(connp.tx_size()).unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Get a transaction.
+///
+/// Returns the transaction or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_tx(
+ connp: *mut ConnectionParser, tx_id: usize,
+) -> *const Transaction {
+ connp
+ .as_ref()
+ .map(|connp| {
+ connp
+ .tx(tx_id)
+ .map(|tx| {
+ if tx.is_started() {
+ tx as *const Transaction
+ } else {
+ std::ptr::null()
+ }
+ })
+ .unwrap_or(std::ptr::null())
+ })
+ .unwrap_or(std::ptr::null())
+}
+
+/// Retrieves the pointer to the active response transaction. In connection
+/// parsing mode there can be many open transactions, and up to 2 active
+/// transactions at any one time. This is due to HTTP pipelining. Can be NULL.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_get_response_tx(
+ connp: *mut ConnectionParser,
+) -> *const Transaction {
+ if let Some(connp) = connp.as_mut() {
+ if let Some(req) = connp.response() {
+ return req;
+ }
+ }
+ std::ptr::null()
+}
+
+/// Retrieves the pointer to the active request transaction. In connection
+/// parsing mode there can be many open transactions, and up to 2 active
+/// transactions at any one time. This is due to HTTP pipelining. Call be NULL.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_get_request_tx(
+ connp: *mut ConnectionParser,
+) -> *const Transaction {
+ if let Some(connp) = connp.as_mut() {
+ if let Some(req) = connp.request() {
+ return req;
+ }
+ }
+ std::ptr::null()
+}
+
+/// Returns the number of bytes consumed from the current data chunks so far or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_request_data_consumed(connp: *const ConnectionParser) -> i64 {
+ connp
+ .as_ref()
+ .map(|connp| connp.request_data_consumed().try_into().ok().unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Returns the number of bytes consumed from the most recent outbound data chunk. Normally, an invocation
+/// of htp_connp_response_data() will consume all data from the supplied buffer, but there are circumstances
+/// where only partial consumption is possible. In such cases HTP_STREAM_DATA_OTHER will be returned.
+/// Consumed bytes are no longer necessary, but the remainder of the buffer will be need to be saved
+/// for later.
+/// Returns the number of bytes consumed from the last data chunk sent for outbound processing
+/// or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_response_data_consumed(connp: *const ConnectionParser) -> i64 {
+ connp
+ .as_ref()
+ .map(|connp| connp.response_data_consumed().try_into().ok().unwrap_or(-1))
+ .unwrap_or(-1)
+}
diff --git a/rust/htp/src/c_api/header.rs b/rust/htp/src/c_api/header.rs
new file mode 100644
index 000000000000..2b93eb54fea4
--- /dev/null
+++ b/rust/htp/src/c_api/header.rs
@@ -0,0 +1,189 @@
+#![deny(missing_docs)]
+use crate::{
+ bstr::Bstr,
+ c_api::bstr::bstr_ptr,
+ transaction::{Header, Headers},
+};
+use std::convert::TryFrom;
+
+/// Get the first header value matching the key.
+///
+/// headers: Header table.
+/// ckey: Header name to match.
+///
+/// Returns the header or NULL when not found or on error
+/// # Safety
+/// When calling this method, you have to ensure that headers is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_headers_get(
+ headers: *const Headers, ckey: *const libc::c_char,
+) -> *const Header {
+ if let (Some(headers), Some(ckey)) = (headers.as_ref(), ckey.as_ref()) {
+ headers
+ .get_nocase_nozero(std::ffi::CStr::from_ptr(ckey).to_bytes())
+ .map(|value| value as *const Header)
+ .unwrap_or(std::ptr::null())
+ } else {
+ std::ptr::null()
+ }
+}
+
+/// Get all headers flags
+///
+/// headers: Header table.
+///
+/// Returns the accumulated header flags or 0 on error.
+/// # Safety
+/// When calling this method, you have to ensure that headers is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_headers_flags(headers: *const Headers) -> u64 {
+ headers
+ .as_ref()
+ .map(|headers| {
+ headers
+ .into_iter()
+ .fold(0, |flags, header| flags | header.flags)
+ })
+ .unwrap_or(0)
+}
+
+/// Get the header at a given index.
+///
+/// headers: Header table.
+/// index: Index into the table.
+///
+/// Returns the header or NULL when not found or on error
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_headers_get_index(
+ headers: *const Headers, index: usize,
+) -> *const Header {
+ headers
+ .as_ref()
+ .map(|headers| {
+ headers
+ .elements
+ .get(index)
+ .map(|value| value as *const Header)
+ .unwrap_or(std::ptr::null())
+ })
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the size of the headers table.
+///
+/// headers: Headers table.
+///
+/// Returns the size or -1 on error
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_headers_size(headers: *const Headers) -> isize {
+ headers
+ .as_ref()
+ .map(|headers| isize::try_from(headers.size()).unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Get the name of a header.
+///
+/// header: Header pointer.
+///
+/// Returns the name or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_name(header: *const Header) -> *const Bstr {
+ header
+ .as_ref()
+ .map(|header| &header.name as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the name of a header as a ptr.
+///
+/// header: Header pointer.
+///
+/// Returns the pointer or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_name_ptr(header: *const Header) -> *const u8 {
+ header
+ .as_ref()
+ .map(|header| bstr_ptr(&header.name) as *const u8)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the header flags
+///
+/// header: Header pointer.
+///
+/// Returns the header flags or 0 on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_flags(header: *const Header) -> u64 {
+ header.as_ref().map(|header| header.flags).unwrap_or(0)
+}
+
+/// Get the length of a header name.
+///
+/// tx: Header pointer.
+///
+/// Returns the length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_name_len(header: *const Header) -> isize {
+ header
+ .as_ref()
+ .map(|header| isize::try_from(header.name.len()).unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Get the value of a header.
+///
+/// tx: Header pointer.
+///
+/// Returns the value or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_value(header: *const Header) -> *const Bstr {
+ header
+ .as_ref()
+ .map(|header| &header.value as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the value of a header as a ptr.
+///
+/// tx: Header pointer.
+///
+/// Returns the pointer or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_value_ptr(header: *const Header) -> *const u8 {
+ header
+ .as_ref()
+ .map(|header| bstr_ptr(&header.value) as *const u8)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the length of a header value.
+///
+/// tx: Header pointer.
+///
+/// Returns the length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_value_len(header: *const Header) -> isize {
+ header
+ .as_ref()
+ .map(|header| isize::try_from(header.value.len()).unwrap_or(-1))
+ .unwrap_or(-1)
+}
diff --git a/rust/htp/src/c_api/log.rs b/rust/htp/src/c_api/log.rs
new file mode 100644
index 000000000000..801131918718
--- /dev/null
+++ b/rust/htp/src/c_api/log.rs
@@ -0,0 +1,53 @@
+#![deny(missing_docs)]
+use crate::log::{HtpLogCode, Log};
+use std::{ffi::CString, os::raw::c_char};
+
+/// Get the log's message string
+///
+/// Returns the log message as a cstring or NULL on error
+/// The caller must free this result with htp_free_cstring
+/// # Safety
+/// When calling this method, you have to ensure that log is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_log_message(log: *const Log) -> *mut c_char {
+ log.as_ref()
+ .and_then(|log| CString::new(log.msg.msg.clone()).ok())
+ .map(|msg| msg.into_raw())
+ .unwrap_or(std::ptr::null_mut())
+}
+
+/// Get a log's message file
+///
+/// Returns the file as a cstring or NULL on error
+/// The caller must free this result with htp_free_cstring
+/// # Safety
+/// When calling this method, you have to ensure that log is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_log_file(log: *const Log) -> *mut c_char {
+ log.as_ref()
+ .and_then(|log| CString::new(log.msg.file.clone()).ok())
+ .map(|msg| msg.into_raw())
+ .unwrap_or(std::ptr::null_mut())
+}
+
+/// Get a log's message code
+///
+/// Returns a code or HTP_LOG_CODE_ERROR on error
+/// # Safety
+/// When calling this method, you have to ensure that log is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_log_code(log: *const Log) -> HtpLogCode {
+ log.as_ref()
+ .map(|log| log.msg.code)
+ .unwrap_or(HtpLogCode::ERROR)
+}
+
+/// Free log
+/// # Safety
+/// This function is unsafe because improper use may lead to memory problems. For example, a double-free may occur if the function is called twice on the same raw pointer.
+#[no_mangle]
+pub unsafe extern "C" fn htp_log_free(log: *mut Log) {
+ if !log.is_null() {
+ drop(Box::from_raw(log));
+ }
+}
diff --git a/rust/htp/src/c_api/mod.rs b/rust/htp/src/c_api/mod.rs
new file mode 100644
index 000000000000..bc5c982a2304
--- /dev/null
+++ b/rust/htp/src/c_api/mod.rs
@@ -0,0 +1,35 @@
+#![deny(missing_docs)]
+use crate::util::get_version;
+use std::ffi::CString;
+
+/// Functions for working with Bstr.
+pub mod bstr;
+/// Functions for working with config.
+pub mod config;
+/// Functions for working with connection.
+pub mod connection;
+/// Functions for working with connection parser.
+pub mod connection_parser;
+/// Functions for working with headers.
+pub mod header;
+/// Functions for working with logs.
+pub mod log;
+/// Functions for working with transactions.
+pub mod transaction;
+/// Functions for working with request uri.
+pub mod uri;
+
+/// Returns the LibHTP version string.
+#[no_mangle]
+pub extern "C" fn htp_get_version() -> *const libc::c_char {
+ get_version().as_ptr() as *const libc::c_char
+}
+
+/// Free rust allocated cstring
+///
+/// # Safety
+/// This should only ever be called with a pointer that was earlier obtained by calling [CString::into_raw].
+#[no_mangle]
+pub unsafe extern "C" fn htp_free_cstring(input: *mut libc::c_char) {
+ input.as_mut().map(|input| CString::from_raw(input));
+}
diff --git a/rust/htp/src/c_api/transaction.rs b/rust/htp/src/c_api/transaction.rs
new file mode 100644
index 000000000000..c5578890db0b
--- /dev/null
+++ b/rust/htp/src/c_api/transaction.rs
@@ -0,0 +1,757 @@
+use crate::{
+ bstr::Bstr, c_api::header::htp_headers_get, config::Config,
+ connection_parser::ConnectionParser, decompressors::HtpContentEncoding,
+ hook::DataExternalCallbackFn, request::HtpMethod, transaction::*, uri::Uri,
+};
+use std::{
+ convert::{TryFrom, TryInto},
+ rc::Rc,
+};
+
+/// Destroys the supplied transaction.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_destroy(connp: *mut ConnectionParser, tx: *const Transaction) {
+ if let (Some(connp), Some(tx)) = (connp.as_mut(), tx.as_ref()) {
+ connp.remove_tx(tx.index)
+ }
+}
+
+/// Get a transaction's normalized parsed uri.
+///
+/// tx: Transaction pointer.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_normalized_uri(tx: *const Transaction) -> *const Bstr {
+ if (*tx).cfg.decoder_cfg.normalized_uri_include_all {
+ tx.as_ref()
+ .and_then(|tx| tx.complete_normalized_uri.as_ref())
+ .map(|uri| uri as *const Bstr)
+ .unwrap_or(std::ptr::null())
+ } else {
+ tx.as_ref()
+ .and_then(|tx| tx.partial_normalized_uri.as_ref())
+ .map(|uri| uri as *const Bstr)
+ .unwrap_or(std::ptr::null())
+ }
+}
+
+/// Get the transaction's configuration.
+///
+/// tx: Transaction pointer.
+///
+/// Returns a pointer to the configuration or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_cfg(tx: *const Transaction) -> *const Config {
+ tx.as_ref()
+ .map(|tx| Rc::as_ptr(&tx.cfg))
+ .unwrap_or(std::ptr::null())
+}
+
+/// Returns the user data associated with this transaction or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_get_user_data(tx: *const Transaction) -> *mut libc::c_void {
+ tx.as_ref()
+ .and_then(|val| val.user_data::<*mut libc::c_void>())
+ .copied()
+ .unwrap_or(std::ptr::null_mut())
+}
+
+/// Associates user data with this transaction.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_set_user_data(tx: *mut Transaction, user_data: *mut libc::c_void) {
+ if let Some(tx) = tx.as_mut() {
+ tx.set_user_data(Box::new(user_data))
+ }
+}
+
+/// Get a transaction's request line.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request line or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_line(tx: *const Transaction) -> *const Bstr {
+ tx.as_ref()
+ .and_then(|tx| tx.request_line.as_ref())
+ .map(|line| line as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request method.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request method or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_method(tx: *const Transaction) -> *const Bstr {
+ tx.as_ref()
+ .and_then(|tx| tx.request_method.as_ref())
+ .map(|method| method as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the transaction's request method number.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request method number or ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_method_number(tx: *const Transaction) -> HtpMethod {
+ tx.as_ref()
+ .map(|tx| tx.request_method_number)
+ .unwrap_or(HtpMethod::ERROR)
+}
+
+/// Get a transaction's request uri.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_uri(tx: *const Transaction) -> *const Bstr {
+ tx.as_ref()
+ .and_then(|tx| tx.request_uri.as_ref())
+ .map(|uri| uri as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request protocol.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the protocol or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_protocol(tx: *const Transaction) -> *const Bstr {
+ tx.as_ref()
+ .and_then(|tx| tx.request_protocol.as_ref())
+ .map(|protocol| protocol as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request protocol number.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the protocol number or ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_protocol_number(tx: *const Transaction) -> HtpProtocol {
+ tx.as_ref()
+ .map(|tx| tx.request_protocol_number)
+ .unwrap_or(HtpProtocol::ERROR)
+}
+
+/// Get whether a transaction's protocol is version 0.9.
+///
+/// tx: Transaction pointer.
+///
+/// Returns 1 if the version is 0.9 or 0 otherwise. A NULL argument will
+/// also result in a return value of 0.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_is_protocol_0_9(tx: *const Transaction) -> i32 {
+ tx.as_ref().map(|tx| tx.is_protocol_0_9 as i32).unwrap_or(0)
+}
+
+/// Get whether a transaction contains a successful 101 Switching Protocol response to HTTP/2.0
+///
+/// tx: Transaction pointer.
+///
+/// Returns 1 if the transaction is an HTTP/2.0 upgrade or 0 otherwise. A NULL argument will
+/// also result in a return value of 0.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_is_http_2_upgrade(tx: *const Transaction) -> i32 {
+ tx.as_ref()
+ .map(|tx| tx.is_http_2_upgrade as i32)
+ .unwrap_or(0)
+}
+
+/// Get a transaction's parsed uri.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the parsed uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_parsed_uri(tx: *const Transaction) -> *const Uri {
+ tx.as_ref()
+ .and_then(|tx| tx.parsed_uri.as_ref())
+ .map(|uri| uri as *const Uri)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request headers.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request headers or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_headers(tx: *const Transaction) -> *const Headers {
+ tx.as_ref()
+ .map(|tx| &tx.request_headers as *const Headers)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request headers size.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the size or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_headers_size(tx: *const Transaction) -> isize {
+ tx.as_ref()
+ .map(|tx| isize::try_from(tx.request_headers.size()).unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Get the first request header value matching the key from a transaction.
+///
+/// tx: Transaction pointer.
+/// ckey: Header name to match.
+///
+/// Returns the header or NULL when not found or on error
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_header(
+ tx: *const Transaction, ckey: *const libc::c_char,
+) -> *const Header {
+ tx.as_ref()
+ .map(|tx| htp_headers_get(&tx.request_headers, ckey))
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the request header at the given index.
+///
+/// tx: Transaction pointer.
+/// index: request header table index.
+///
+/// Returns the header or NULL on error
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_header_index(
+ tx: *const Transaction, index: usize,
+) -> *const Header {
+ tx.as_ref()
+ .map(|tx| {
+ tx.request_headers
+ .elements
+ .get(index)
+ .map(|value| value as *const Header)
+ .unwrap_or(std::ptr::null())
+ })
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request transfer coding.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the transfer coding or ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_transfer_coding(
+ tx: *const Transaction,
+) -> HtpTransferCoding {
+ tx.as_ref()
+ .map(|tx| tx.request_transfer_coding)
+ .unwrap_or(HtpTransferCoding::ERROR)
+}
+
+/// Get a transaction's request content encoding.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the content encoding or ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_content_encoding(
+ tx: *const Transaction,
+) -> HtpContentEncoding {
+ tx.as_ref()
+ .map(|tx| tx.request_content_encoding)
+ .unwrap_or(HtpContentEncoding::ERROR)
+}
+
+/// Get a transaction's request content type.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the content type or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_content_type(tx: *const Transaction) -> *const Bstr {
+ tx.as_ref()
+ .and_then(|tx| tx.request_content_type.as_ref())
+ .map(|content_type| content_type as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request content length.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the content length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_content_length(tx: *const Transaction) -> i64 {
+ tx.as_ref()
+ .map(|tx| {
+ tx.request_content_length
+ .map(|len| len.try_into().ok().unwrap_or(-1))
+ .unwrap_or(-1)
+ })
+ .unwrap_or(-1)
+}
+
+/// Get the transaction's request authentication type.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the auth type or HTP_AUTH_ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_auth_type(tx: *const Transaction) -> HtpAuthType {
+ tx.as_ref()
+ .map(|tx| tx.request_auth_type)
+ .unwrap_or(HtpAuthType::ERROR)
+}
+
+/// Get a transaction's request hostname.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request hostname or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_hostname(tx: *const Transaction) -> *const Bstr {
+ tx.as_ref()
+ .and_then(|tx| tx.request_hostname.as_ref())
+ .map(|hostname| hostname as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the transaction's request port number.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request port number or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_port_number(tx: *const Transaction) -> i32 {
+ tx.as_ref()
+ .and_then(|tx| tx.request_port_number.as_ref())
+ .map(|port| *port as i32)
+ .unwrap_or(-1)
+}
+
+/// Get a transaction's request message length.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request message length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_message_len(tx: *const Transaction) -> i64 {
+ tx.as_ref()
+ .map(|tx| tx.request_message_len.try_into().ok().unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Get a transaction's request entity length.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request entity length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_entity_len(tx: *const Transaction) -> i64 {
+ tx.as_ref()
+ .map(|tx| tx.request_entity_len.try_into().ok().unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Get a transaction's response line.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response line or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_line(tx: *const Transaction) -> *const Bstr {
+ tx.as_ref()
+ .and_then(|tx| tx.response_line.as_ref())
+ .map(|response_line| response_line as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's response protocol.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response protocol or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_protocol(tx: *const Transaction) -> *const Bstr {
+ tx.as_ref()
+ .and_then(|tx| tx.response_protocol.as_ref())
+ .map(|response_protocol| response_protocol as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's response protocol number.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the protocol number or ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_protocol_number(tx: *const Transaction) -> HtpProtocol {
+ tx.as_ref()
+ .map(|tx| tx.response_protocol_number)
+ .unwrap_or(HtpProtocol::ERROR)
+}
+
+/// Get the transaction's response status.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response status or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_status(tx: *const Transaction) -> *const Bstr {
+ tx.as_ref()
+ .and_then(|tx| tx.response_status.as_ref())
+ .map(|response_status| response_status as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the transaction's response status number.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response status number or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_status_number(tx: *const Transaction) -> i32 {
+ tx.as_ref()
+ .map(|tx| match tx.response_status_number {
+ HtpResponseNumber::UNKNOWN => 0,
+ HtpResponseNumber::INVALID => -1,
+ HtpResponseNumber::VALID(status) => status as i32,
+ })
+ .unwrap_or(-1)
+}
+/// Get the transaction's response status expected number.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the expected number or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_status_expected_number(tx: *const Transaction) -> i32 {
+ tx.as_ref()
+ .map(|tx| tx.response_status_expected_number as i32)
+ .unwrap_or(-1)
+}
+
+/// Get a transaction's response message.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response message or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_message(tx: *const Transaction) -> *const Bstr {
+ tx.as_ref()
+ .and_then(|tx| tx.response_message.as_ref())
+ .map(|response_message| response_message as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's response headers.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response headers or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_headers(tx: *const Transaction) -> *const Headers {
+ tx.as_ref()
+ .map(|tx| &tx.response_headers as *const Headers)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's response headers size.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the size or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_headers_size(tx: *const Transaction) -> isize {
+ tx.as_ref()
+ .map(|tx| isize::try_from(tx.response_headers.size()).unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Get the first response header value matching the key from a transaction.
+///
+/// tx: Transaction pointer.
+/// ckey: Header name to match.
+///
+/// Returns the header or NULL when not found or on error
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_header(
+ tx: *const Transaction, ckey: *const libc::c_char,
+) -> *const Header {
+ tx.as_ref()
+ .map(|tx| htp_headers_get(&tx.response_headers, ckey))
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the response header at the given index.
+///
+/// tx: Transaction pointer.
+/// index: response header table index.
+///
+/// Returns the header or NULL on error
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_header_index(
+ tx: *const Transaction, index: usize,
+) -> *const Header {
+ tx.as_ref()
+ .map(|tx| {
+ tx.response_headers
+ .elements
+ .get(index)
+ .map(|value| value as *const Header)
+ .unwrap_or(std::ptr::null())
+ })
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's response message length.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response message length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_message_len(tx: *const Transaction) -> i64 {
+ tx.as_ref()
+ .map(|tx| tx.response_message_len.try_into().ok().unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Get a transaction's response entity length.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response entity length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_entity_len(tx: *const Transaction) -> i64 {
+ tx.as_ref()
+ .map(|tx| tx.response_entity_len.try_into().ok().unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Get a transaction's response content length.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response content length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_content_length(tx: *const Transaction) -> i64 {
+ tx.as_ref()
+ .map(|tx| {
+ tx.response_content_length
+ .map(|len| len.try_into().ok().unwrap_or(-1))
+ .unwrap_or(-1)
+ })
+ .unwrap_or(-1)
+}
+
+/// Get a transaction's response content type.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response content type or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_content_type(tx: *const Transaction) -> *const Bstr {
+ tx.as_ref()
+ .and_then(|tx| tx.response_content_type.as_ref())
+ .map(|response_content_type| response_content_type as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the transaction's bit flags.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the flags represented as an integer or 0 if the flags are empty
+/// or a NULL ptr is passed as an argument.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_flags(tx: *const Transaction) -> u64 {
+ tx.as_ref().map(|tx| tx.flags).unwrap_or(0)
+}
+
+/// Get the transaction's request progress.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the progress or HTP_REQUEST_ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_progress(tx: *const Transaction) -> HtpRequestProgress {
+ tx.as_ref()
+ .map(|tx| tx.request_progress)
+ .unwrap_or(HtpRequestProgress::ERROR)
+}
+
+/// Get the transaction's response progress.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the progress or ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_progress(tx: *const Transaction) -> HtpResponseProgress {
+ tx.as_ref()
+ .map(|tx| tx.response_progress)
+ .unwrap_or(HtpResponseProgress::ERROR)
+}
+
+/// Get the transaction's index.
+///
+/// tx: Transaction pointer.
+///
+/// Returns an index or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_index(tx: *const Transaction) -> isize {
+ tx.as_ref()
+ .map(|tx| isize::try_from(tx.index).unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Register callback for the transaction-specific RESPONSE_BODY_DATA hook.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_register_response_body_data(
+ tx: *mut Transaction, cbk_fn: DataExternalCallbackFn,
+) {
+ if let Some(tx) = tx.as_mut() {
+ tx.hook_response_body_data.register_extern(cbk_fn)
+ }
+}
+
+/// Get the data's transaction.
+///
+/// Returns the transaction or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that data is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_data_tx(data: *const Data) -> *const Transaction {
+ data.as_ref()
+ .map(|data| data.tx() as *const Transaction)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the data pointer.
+///
+/// Returns the data or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that data is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_data_data(data: *const Data) -> *const u8 {
+ data.as_ref()
+ .map(|data| data.data())
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the length of the data.
+///
+/// Returns the length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that data is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_data_len(data: *const Data) -> isize {
+ data.as_ref()
+ .map(|data| isize::try_from(data.len()).unwrap_or(-1))
+ .unwrap_or(-1)
+}
+
+/// Get whether this data is empty.
+///
+/// Returns true if data is NULL or zero-length.
+/// # Safety
+/// When calling this method, you have to ensure that data is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_data_is_empty(data: *const Data) -> bool {
+ data.as_ref().map(|data| data.is_empty()).unwrap_or(true)
+}
diff --git a/rust/htp/src/c_api/uri.rs b/rust/htp/src/c_api/uri.rs
new file mode 100644
index 000000000000..a51f00ca916e
--- /dev/null
+++ b/rust/htp/src/c_api/uri.rs
@@ -0,0 +1,118 @@
+use crate::{bstr::Bstr, uri::Uri};
+
+/// Get the scheme of a uri.
+///
+/// Returns the scheme for uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that uri is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_uri_scheme(uri: *const Uri) -> *const Bstr {
+ uri.as_ref()
+ .and_then(|uri| uri.scheme.as_ref())
+ .map(|scheme| scheme as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the username of a uri.
+///
+/// Returns the username for uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that uri is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_uri_username(uri: *const Uri) -> *const Bstr {
+ uri.as_ref()
+ .and_then(|uri| uri.username.as_ref())
+ .map(|username| username as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the password of a uri.
+///
+/// Returns the password for uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that uri is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_uri_password(uri: *const Uri) -> *const Bstr {
+ uri.as_ref()
+ .and_then(|uri| uri.password.as_ref())
+ .map(|password| password as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the hostname of a uri.
+///
+/// Returns the hostname for uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that uri is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_uri_hostname(uri: *const Uri) -> *const Bstr {
+ uri.as_ref()
+ .and_then(|uri| uri.hostname.as_ref())
+ .map(|hostname| hostname as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the port of a uri.
+///
+/// Returns the port for uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that uri is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_uri_port(uri: *const Uri) -> *const Bstr {
+ uri.as_ref()
+ .and_then(|uri| uri.port.as_ref())
+ .map(|port| port as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the port_number of a uri.
+///
+/// Returns the port_number for uri or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that uri is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_uri_port_number(uri: *const Uri) -> i32 {
+ uri.as_ref()
+ .and_then(|uri| uri.port_number)
+ .map(|port| port as i32)
+ .unwrap_or(-1)
+}
+
+/// Get the path of a uri.
+///
+/// Returns the path for uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that uri is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_uri_path(uri: *const Uri) -> *const Bstr {
+ uri.as_ref()
+ .and_then(|uri| uri.path.as_ref())
+ .map(|path| path as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the query of a uri.
+///
+/// Returns the query for uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that uri is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_uri_query(uri: *const Uri) -> *const Bstr {
+ uri.as_ref()
+ .and_then(|uri| uri.query.as_ref())
+ .map(|query| query as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
+
+/// Get the fragment of a uri.
+///
+/// Returns the fragment for uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that uri is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_uri_fragment(uri: *const Uri) -> *const Bstr {
+ uri.as_ref()
+ .and_then(|uri| uri.fragment.as_ref())
+ .map(|fragment| fragment as *const Bstr)
+ .unwrap_or(std::ptr::null())
+}
diff --git a/rust/htp/src/config.rs b/rust/htp/src/config.rs
new file mode 100644
index 000000000000..ceb103d13165
--- /dev/null
+++ b/rust/htp/src/config.rs
@@ -0,0 +1,636 @@
+use crate::decompressors::Options;
+use crate::{
+ error::Result,
+ hook::{
+ DataHook, DataNativeCallbackFn, LogHook, LogNativeCallbackFn, TxHook, TxNativeCallbackFn,
+ },
+ log::HtpLogLevel,
+ transaction::Param,
+ unicode_bestfit_map::UnicodeBestfitMap,
+ HtpStatus,
+};
+
+/// Configuration for libhtp parsing.
+#[derive(Clone)]
+pub struct Config {
+ /// The maximum size of the buffer that is used when the current
+ /// input chunk does not contain all the necessary data (e.g., a header
+ /// line that spans several packets).
+ pub field_limit: usize,
+ /// Log level, which will be used when deciding whether to store or
+ /// ignore the messages issued by the parser.
+ pub log_level: HtpLogLevel,
+ /// Whether to delete each transaction after the last hook is invoked. This
+ /// feature should be used when parsing traffic streams in real time.
+ pub tx_auto_destroy: bool,
+ /// Server personality identifier.
+ pub server_personality: HtpServerPersonality,
+ /// The function to use to transform parameters after parsing.
+ pub parameter_processor: Option Result<()>>,
+ /// Decoder configuration for url path.
+ pub decoder_cfg: DecoderConfig,
+ /// Whether to decompress compressed response bodies.
+ pub response_decompression_enabled: bool,
+ /// Whether to parse urlencoded data.
+ pub parse_urlencoded: bool,
+ /// Whether to parse HTTP Authentication headers.
+ pub parse_request_auth: bool,
+ /// Request start hook, invoked when the parser receives the first byte of a new
+ /// request. Because an HTTP transaction always starts with a request, this hook
+ /// doubles as a transaction start hook.
+ pub hook_request_start: TxHook,
+ /// Request line hook, invoked after a request line has been parsed.
+ pub hook_request_line: TxHook,
+ /// Request URI normalization hook, for overriding default normalization of URI.
+ pub hook_request_uri_normalize: TxHook,
+ /// Receives raw request header data, starting immediately after the request line,
+ /// including all headers as they are seen on the TCP connection, and including the
+ /// terminating empty line. Not available on genuine HTTP/0.9 requests (because
+ /// they don't use headers).
+ pub hook_request_header_data: DataHook,
+ /// Request headers hook, invoked after all request headers are seen.
+ pub hook_request_headers: TxHook,
+ /// Request body data hook, invoked every time body data is available. Each
+ /// invocation will provide a Data instance. Chunked data
+ /// will be dechunked before the data is passed to this hook. Decompression
+ /// is not currently implemented. At the end of the request body
+ /// there will be a call with the data set to None.
+ pub hook_request_body_data: DataHook,
+ /// Receives raw request trailer data, which can be available on requests that have
+ /// chunked bodies. The data starts immediately after the zero-length chunk
+ /// and includes the terminating empty line.
+ pub hook_request_trailer_data: DataHook,
+ /// Request trailer hook, invoked after all trailer headers are seen,
+ /// and if they are seen (not invoked otherwise).
+ pub hook_request_trailer: TxHook,
+ /// Request hook, invoked after a complete request is seen.
+ pub hook_request_complete: TxHook,
+ /// Response startup hook, invoked when a response transaction is found and
+ /// processing started.
+ pub hook_response_start: TxHook,
+ /// Response line hook, invoked after a response line has been parsed.
+ pub hook_response_line: TxHook,
+ /// Receives raw response header data, starting immediately after the status line
+ /// and including all headers as they are seen on the TCP connection, and including the
+ /// terminating empty line. Not available on genuine HTTP/0.9 responses (because
+ /// they don't have response headers).
+ pub hook_response_header_data: DataHook,
+ /// Response headers book, invoked after all response headers have been seen.
+ pub hook_response_headers: TxHook,
+ /// Response body data hook, invoked every time body data is available. Each
+ /// invocation will provide a Data instance. Chunked data
+ /// will be dechunked before the data is passed to this hook. By default,
+ /// compressed data will be decompressed, but decompression can be disabled
+ /// in configuration. At the end of the response body there will be a call
+ /// with the data pointer set to NULL.
+ pub hook_response_body_data: DataHook,
+ /// Receives raw response trailer data, which can be available on responses that have
+ /// chunked bodies. The data starts immediately after the zero-length chunk
+ /// and includes the terminating empty line.
+ pub hook_response_trailer_data: DataHook,
+ /// Response trailer hook, invoked after all trailer headers have been processed,
+ /// and only if the trailer exists.
+ pub hook_response_trailer: TxHook,
+ /// Response hook, invoked after a response has been seen. Because sometimes servers
+ /// respond before receiving complete requests, a response_complete callback may be
+ /// invoked prior to a request_complete callback.
+ pub hook_response_complete: TxHook,
+ /// Transaction complete hook, which is invoked once the entire transaction is
+ /// considered complete (request and response are both complete). This is always
+ /// the last hook to be invoked.
+ pub hook_transaction_complete: TxHook,
+ /// Log hook, invoked every time the library wants to log.
+ pub hook_log: LogHook,
+ /// Reaction to leading whitespace on the request line
+ pub requestline_leading_whitespace_unwanted: HtpUnwanted,
+ /// Whether to decompress compressed request bodies.
+ pub request_decompression_enabled: bool,
+ /// Configuration options for decompression.
+ pub compression_options: Options,
+ /// Flush incomplete transactions
+ pub flush_incomplete: bool,
+ /// Maximum number of transactions
+ pub max_tx: u32,
+ /// Maximum number of headers
+ pub number_headers_limit: u32,
+}
+
+impl Default for Config {
+ fn default() -> Self {
+ Self {
+ field_limit: 18000,
+ log_level: HtpLogLevel::NOTICE,
+ tx_auto_destroy: false,
+ server_personality: HtpServerPersonality::MINIMAL,
+ parameter_processor: None,
+ decoder_cfg: Default::default(),
+ response_decompression_enabled: true,
+ parse_urlencoded: false,
+ parse_request_auth: true,
+ hook_request_start: TxHook::default(),
+ hook_request_line: TxHook::default(),
+ hook_request_uri_normalize: TxHook::default(),
+ hook_request_header_data: DataHook::default(),
+ hook_request_headers: TxHook::default(),
+ hook_request_body_data: DataHook::default(),
+ hook_request_trailer_data: DataHook::default(),
+ hook_request_trailer: TxHook::default(),
+ hook_request_complete: TxHook::default(),
+ hook_response_start: TxHook::default(),
+ hook_response_line: TxHook::default(),
+ hook_response_header_data: DataHook::default(),
+ hook_response_headers: TxHook::default(),
+ hook_response_body_data: DataHook::default(),
+ hook_response_trailer_data: DataHook::default(),
+ hook_response_trailer: TxHook::default(),
+ hook_response_complete: TxHook::default(),
+ hook_transaction_complete: TxHook::default(),
+ hook_log: LogHook::default(),
+ requestline_leading_whitespace_unwanted: HtpUnwanted::IGNORE,
+ request_decompression_enabled: false,
+ compression_options: Options::default(),
+ flush_incomplete: false,
+ max_tx: 512,
+ number_headers_limit: 1024,
+ }
+ }
+}
+
+/// Configuration options for decoding.
+#[derive(Copy, Clone)]
+pub struct DecoderConfig {
+ ///Whether to double decode the path in normalized uri
+ pub double_decode_normalized_path: bool,
+ /// Whether to double decode the query in the normalized uri
+ pub double_decode_normalized_query: bool,
+ // Path-specific decoding options.
+ /// Convert backslash characters to slashes.
+ pub backslash_convert_slashes: bool,
+ /// Convert to lowercase.
+ pub convert_lowercase: bool,
+ /// Compress slash characters.
+ pub path_separators_compress: bool,
+ /// Should we URL-decode encoded path segment separators?
+ pub path_separators_decode: bool,
+ /// Should we decode '+' characters to spaces?
+ pub plusspace_decode: bool,
+ /// Reaction to encoded path separators.
+ pub path_separators_encoded_unwanted: HtpUnwanted,
+ // Special characters options.
+ /// Controls how raw NUL bytes are handled.
+ pub nul_raw_terminates: bool,
+ /// Determines server response to a raw NUL byte in the path.
+ pub nul_raw_unwanted: HtpUnwanted,
+ /// Reaction to control characters.
+ pub control_chars_unwanted: HtpUnwanted,
+ /// Allow whitespace characters in request uri path
+ pub allow_space_uri: bool,
+ // URL encoding options.
+ /// Should we decode %u-encoded characters?
+ pub u_encoding_decode: bool,
+ /// Reaction to %u encoding.
+ pub u_encoding_unwanted: HtpUnwanted,
+ /// Handling of invalid URL encodings.
+ pub url_encoding_invalid_handling: HtpUrlEncodingHandling,
+ /// Reaction to invalid URL encoding.
+ pub url_encoding_invalid_unwanted: HtpUnwanted,
+ /// Controls how encoded NUL bytes are handled.
+ pub nul_encoded_terminates: bool,
+ /// How are we expected to react to an encoded NUL byte?
+ pub nul_encoded_unwanted: HtpUnwanted,
+ // Normalized URI preference
+ /// Controls whether the client wants the complete or partial normalized URI.
+ pub normalized_uri_include_all: bool,
+ // UTF-8 options.
+ /// Controls how invalid UTF-8 characters are handled.
+ pub utf8_invalid_unwanted: HtpUnwanted,
+ /// Convert UTF-8 characters into bytes using best-fit mapping.
+ pub utf8_convert_bestfit: bool,
+ /// Best-fit map for UTF-8 decoding.
+ pub bestfit_map: UnicodeBestfitMap,
+}
+
+impl Default for DecoderConfig {
+ fn default() -> Self {
+ Self {
+ double_decode_normalized_path: false,
+ double_decode_normalized_query: false,
+ backslash_convert_slashes: false,
+ convert_lowercase: false,
+ path_separators_compress: false,
+ path_separators_decode: false,
+ plusspace_decode: true,
+ path_separators_encoded_unwanted: HtpUnwanted::IGNORE,
+ nul_raw_terminates: false,
+ nul_raw_unwanted: HtpUnwanted::IGNORE,
+ control_chars_unwanted: HtpUnwanted::IGNORE,
+ allow_space_uri: false,
+ u_encoding_decode: false,
+ u_encoding_unwanted: HtpUnwanted::IGNORE,
+ url_encoding_invalid_handling: HtpUrlEncodingHandling::PRESERVE_PERCENT,
+ url_encoding_invalid_unwanted: HtpUnwanted::IGNORE,
+ nul_encoded_terminates: false,
+ nul_encoded_unwanted: HtpUnwanted::IGNORE,
+ normalized_uri_include_all: false,
+ utf8_invalid_unwanted: HtpUnwanted::IGNORE,
+ utf8_convert_bestfit: false,
+ bestfit_map: UnicodeBestfitMap::default(),
+ }
+ }
+}
+
+/// Enumerates the possible server personalities.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum HtpServerPersonality {
+ /// Minimal personality that performs as little work as possible. All optional
+ /// features are disabled. This personality is a good starting point for customization.
+ MINIMAL,
+ /// A generic personality that aims to work reasonably well for all server types.
+ GENERIC,
+ /// The IDS personality tries to perform as much decoding as possible.
+ IDS,
+ /// Mimics the behavior of IIS 4.0, as shipped with Windows NT 4.0.
+ IIS_4_0,
+ /// Mimics the behavior of IIS 5.0, as shipped with Windows 2000.
+ IIS_5_0,
+ /// Mimics the behavior of IIS 5.1, as shipped with Windows XP Professional.
+ IIS_5_1,
+ /// Mimics the behavior of IIS 6.0, as shipped with Windows 2003.
+ IIS_6_0,
+ /// Mimics the behavior of IIS 7.0, as shipped with Windows 2008.
+ IIS_7_0,
+ /// Mimics the behavior of IIS 7.5, as shipped with Windows 7.
+ IIS_7_5,
+ /// Mimics the behavior of Apache 2.x.
+ APACHE_2,
+}
+
+/// Enumerates the ways in which servers respond to malformed data.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum HtpUnwanted {
+ /// Ignores problem.
+ IGNORE,
+ /// Responds with HTTP 400 status code.
+ CODE_400 = 400,
+ /// Responds with HTTP 404 status code.
+ CODE_404 = 404,
+}
+
+/// Enumerates the possible approaches to handling invalid URL-encodings.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum HtpUrlEncodingHandling {
+ /// Ignore invalid URL encodings and leave the % in the data.
+ PRESERVE_PERCENT,
+ /// Ignore invalid URL encodings, but remove the % from the data.
+ REMOVE_PERCENT,
+ /// Decode invalid URL encodings.
+ PROCESS_INVALID,
+}
+
+impl Config {
+ /// Registers a callback that is invoked every time there is a log message with
+ /// severity equal and higher than the configured log level.
+ pub fn register_log(&mut self, cbk_fn: LogNativeCallbackFn) {
+ self.hook_log.register(cbk_fn);
+ }
+
+ /// Registers a request_complete callback, which is invoked when we see the
+ /// first bytes of data from a request.
+ pub fn register_request_complete(&mut self, cbk_fn: TxNativeCallbackFn) {
+ self.hook_request_complete.register(cbk_fn);
+ }
+
+ /// Registers a request_body_data callback, which is invoked whenever we see
+ /// bytes of request body data.
+ pub fn register_request_body_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+ self.hook_request_body_data.register(cbk_fn);
+ }
+
+ /// Registers a request_header_data callback, which is invoked when we see header
+ /// data. This callback receives raw header data as seen on the connection, including
+ /// the terminating line and anything seen after the request line.
+ pub fn register_request_header_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+ self.hook_request_header_data.register(cbk_fn);
+ }
+
+ /// Registers a request_headers callback, which is invoked after we see all the
+ /// request headers.
+ pub fn register_request_headers(&mut self, cbk_fn: TxNativeCallbackFn) {
+ self.hook_request_headers.register(cbk_fn);
+ }
+
+ /// Registers a request_line callback, which is invoked after we parse the entire
+ /// request line.
+ pub fn register_request_line(&mut self, cbk_fn: TxNativeCallbackFn) {
+ self.hook_request_line.register(cbk_fn);
+ }
+
+ /// Registers a request_start callback, which is invoked every time a new
+ /// request begins and before any parsing is done.
+ pub fn register_request_start(&mut self, cbk_fn: TxNativeCallbackFn) {
+ self.hook_request_start.register(cbk_fn);
+ }
+
+ /// Registers a request_trailer callback, which is invoked when all trailer headers
+ /// are seen, if present.
+ pub fn register_request_trailer(&mut self, cbk_fn: TxNativeCallbackFn) {
+ self.hook_request_trailer.register(cbk_fn);
+ }
+
+ /// Registers a request_trailer_data callback, which may be invoked on requests with
+ /// chunked bodies. This callback receives the raw response trailer data after the zero-length
+ /// chunk including the terminating line.
+ pub fn register_request_trailer_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+ self.hook_request_trailer_data.register(cbk_fn);
+ }
+
+ /// Registers a response_body_data callback, which is invoked whenever we see
+ /// bytes of response body data.
+ pub fn register_response_body_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+ self.hook_response_body_data.register(cbk_fn);
+ }
+
+ /// Registers a response_complete callback, which is invoked when we see the
+ /// first bytes of data from a response.
+ pub fn register_response_complete(&mut self, cbk_fn: TxNativeCallbackFn) {
+ self.hook_response_complete.register(cbk_fn);
+ }
+
+ /// Registers a response_header_data callback, which is invoked when we see header
+ /// data. This callback receives raw header data as seen on the connection, including
+ /// the terminating line and anything seen after the response line.
+ pub fn register_response_header_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+ self.hook_response_header_data.register(cbk_fn);
+ }
+
+ /// Registers a response_headers callback, which is invoked after we see all the
+ /// response headers.
+ #[allow(dead_code)]
+ pub fn register_response_headers(&mut self, cbk_fn: TxNativeCallbackFn) {
+ self.hook_response_headers.register(cbk_fn);
+ }
+
+ /// Registers a response_line callback, which is invoked after we parse the entire
+ /// response line.
+ #[allow(dead_code)]
+ pub fn register_response_line(&mut self, cbk_fn: TxNativeCallbackFn) {
+ self.hook_response_line.register(cbk_fn);
+ }
+
+ /// Registers a response_start callback, which is invoked when we see the
+ /// first bytes of data from a response.
+ pub fn register_response_start(&mut self, cbk_fn: TxNativeCallbackFn) {
+ self.hook_response_start.register(cbk_fn);
+ }
+
+ /// Registers a response_trailer callback, which is invoked if when all
+ /// trailer headers are seen, if present.
+ pub fn register_response_trailer(&mut self, cbk_fn: TxNativeCallbackFn) {
+ self.hook_response_trailer.register(cbk_fn);
+ }
+
+ /// Registers a response_trailer_data callback, which may be invoked on responses with
+ /// chunked bodies. This callback receives the raw response trailer data after the zero-length
+ /// chunk and including the terminating line.
+ pub fn register_response_trailer_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+ self.hook_response_trailer_data.register(cbk_fn);
+ }
+
+ /// Registers a transaction_complete callback, which is invoked once the request and response
+ /// are both complete.
+ pub fn register_transaction_complete(&mut self, cbk_fn: TxNativeCallbackFn) {
+ self.hook_transaction_complete.register(cbk_fn);
+ }
+
+ /// Enable or disable the double decoding of the path in the normalized uri
+ pub fn set_double_decode_normalized_path(&mut self, double_decode_normalized_path: bool) {
+ self.decoder_cfg.double_decode_normalized_path = double_decode_normalized_path;
+ }
+
+ /// Enable or disable the double decoding of the query in the normalized uri
+ pub fn set_double_decode_normalized_query(&mut self, double_decode_normalized_query: bool) {
+ self.decoder_cfg.double_decode_normalized_query = double_decode_normalized_query;
+ }
+
+ /// Enable or disable the built-in Urlencoded parser. Disabled by default.
+ /// The parser will parse query strings and request bodies with the appropriate MIME type.
+ pub fn set_parse_urlencoded(&mut self, parse_urlencoded: bool) {
+ self.parse_urlencoded = parse_urlencoded;
+ }
+
+ /// Configures the maximum size of the buffer LibHTP will use when all data is not available
+ /// in the current buffer (e.g., a very long header line that might span several packets). This
+ /// limit is controlled by the field_limit parameter.
+ pub fn set_field_limit(&mut self, field_limit: usize) {
+ self.field_limit = field_limit;
+ }
+
+ /// Enable or disable spaces in URIs. Disabled by default.
+ pub fn set_allow_space_uri(&mut self, allow_space: bool) {
+ self.decoder_cfg.allow_space_uri = allow_space;
+ }
+
+ /// Configure desired server personality.
+ /// Returns an Error if the personality is not supported.
+ pub fn set_server_personality(&mut self, personality: HtpServerPersonality) -> Result<()> {
+ match personality {
+ HtpServerPersonality::MINIMAL => {}
+ HtpServerPersonality::GENERIC => {
+ self.set_backslash_convert_slashes(true);
+ self.set_path_separators_decode(true);
+ self.set_path_separators_compress(true);
+ }
+ HtpServerPersonality::IDS => {
+ self.set_backslash_convert_slashes(true);
+ self.set_path_separators_decode(true);
+ self.set_path_separators_compress(true);
+ self.set_convert_lowercase(true);
+ self.set_utf8_convert_bestfit(true);
+ self.set_u_encoding_decode(true);
+ self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::IGNORE);
+ }
+ HtpServerPersonality::APACHE_2 => {
+ self.set_backslash_convert_slashes(false);
+ self.set_path_separators_decode(false);
+ self.set_path_separators_compress(true);
+ self.set_u_encoding_decode(false);
+ self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+ self.set_url_encoding_invalid_unwanted(HtpUnwanted::CODE_400);
+ self.set_control_chars_unwanted(HtpUnwanted::IGNORE);
+ self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::CODE_400);
+ }
+ HtpServerPersonality::IIS_5_1 => {
+ self.set_backslash_convert_slashes(true);
+ self.set_path_separators_decode(true);
+ self.set_path_separators_compress(true);
+ self.set_u_encoding_decode(false);
+ self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+ self.set_control_chars_unwanted(HtpUnwanted::IGNORE);
+ self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::IGNORE);
+ }
+ HtpServerPersonality::IIS_6_0 => {
+ self.set_backslash_convert_slashes(true);
+ self.set_path_separators_decode(true);
+ self.set_path_separators_compress(true);
+ self.set_u_encoding_decode(true);
+ self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+ self.set_u_encoding_unwanted(HtpUnwanted::CODE_400);
+ self.set_control_chars_unwanted(HtpUnwanted::CODE_400);
+ self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::IGNORE);
+ }
+ HtpServerPersonality::IIS_7_0 | HtpServerPersonality::IIS_7_5 => {
+ self.set_backslash_convert_slashes(true);
+ self.set_path_separators_decode(true);
+ self.set_path_separators_compress(true);
+ self.set_u_encoding_decode(true);
+ self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+ self.set_url_encoding_invalid_unwanted(HtpUnwanted::CODE_400);
+ self.set_control_chars_unwanted(HtpUnwanted::CODE_400);
+ self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::IGNORE);
+ }
+ _ => return Err(HtpStatus::ERROR),
+ }
+ // Remember the personality
+ self.server_personality = personality;
+ Ok(())
+ }
+
+ /// Configures whether transactions will be automatically destroyed once they
+ /// are processed and all callbacks invoked. This option is appropriate for
+ /// programs that process transactions as they are processed.
+ pub fn set_tx_auto_destroy(&mut self, tx_auto_destroy: bool) {
+ self.tx_auto_destroy = tx_auto_destroy;
+ }
+
+ /// Configures whether incomplete transactions will be flushed when a connection is closed.
+ ///
+ /// This will invoke the transaction complete callback for each incomplete transaction. The
+ /// transactions passed to the callback will not have their request and response state set
+ /// to complete - they will simply be passed with the state they have within the parser at
+ /// the time of the call.
+ ///
+ /// This option is intended to be used when a connection is closing and we want to process
+ /// any incomplete transactions that were in flight, or which never completed due to packet
+ /// loss or parsing errors.
+ ///
+ /// These transactions will also be removed from the parser when auto destroy is enabled.
+ pub fn set_flush_incomplete(&mut self, flush_incomplete: bool) {
+ self.flush_incomplete = flush_incomplete;
+ }
+
+ /// Configures a best-fit map, which is used whenever characters longer than one byte
+ /// need to be converted to a single-byte. By default a Windows 1252 best-fit map is used.
+ pub fn set_bestfit_map(&mut self, map: UnicodeBestfitMap) {
+ self.decoder_cfg.bestfit_map = map;
+ }
+
+ /// Sets the replacement character that will be used in the lossy best-fit
+ /// mapping from multi-byte to single-byte streams. The question mark character
+ /// is used as the default replacement byte.
+ pub fn set_bestfit_replacement_byte(&mut self, b: u8) {
+ self.decoder_cfg.bestfit_map.replacement_byte = b;
+ }
+
+ /// Configures how the server handles to invalid URL encoding.
+ pub fn set_url_encoding_invalid_handling(&mut self, handling: HtpUrlEncodingHandling) {
+ self.decoder_cfg.url_encoding_invalid_handling = handling;
+ }
+
+ /// Configures the handling of raw NUL bytes. If enabled, raw NUL terminates strings.
+ pub fn set_nul_raw_terminates(&mut self, enabled: bool) {
+ self.decoder_cfg.nul_raw_terminates = enabled;
+ }
+
+ /// Configures how the server reacts to encoded NUL bytes. Some servers will stop at
+ /// at NUL, while some will respond with 400 or 404. When the termination option is not
+ /// used, the NUL byte will remain in the path.
+ pub fn set_nul_encoded_terminates(&mut self, enabled: bool) {
+ self.decoder_cfg.nul_encoded_terminates = enabled;
+ }
+
+ /// Configures whether %u-encoded sequences are decoded. Such sequences
+ /// will be treated as invalid URL encoding if decoding is not desirable.
+ pub fn set_u_encoding_decode(&mut self, enabled: bool) {
+ self.decoder_cfg.u_encoding_decode = enabled;
+ }
+
+ /// Configures whether backslash characters are treated as path segment separators. They
+ /// are not on Unix systems, but are on Windows systems. If this setting is enabled, a path
+ /// such as "/one\two/three" will be converted to "/one/two/three".
+ pub fn set_backslash_convert_slashes(&mut self, enabled: bool) {
+ self.decoder_cfg.backslash_convert_slashes = enabled;
+ }
+
+ /// Configures whether encoded path segment separators will be decoded. Apache does not do
+ /// this by default, but IIS does. If enabled, a path such as "/one%2ftwo" will be normalized
+ /// to "/one/two". If the backslash_separators option is also enabled, encoded backslash
+ /// characters will be converted too (and subsequently normalized to forward slashes).
+ pub fn set_path_separators_decode(&mut self, enabled: bool) {
+ self.decoder_cfg.path_separators_decode = enabled;
+ }
+
+ /// Configures whether consecutive path segment separators will be compressed. When enabled, a path
+ /// such as "/one//two" will be normalized to "/one/two". Backslash conversion and path segment separator
+ /// decoding are carried out before compression. For example, the path "/one\\/two\/%5cthree/%2f//four"
+ /// will be converted to "/one/two/three/four" (assuming all 3 options are enabled).
+ pub fn set_path_separators_compress(&mut self, enabled: bool) {
+ self.decoder_cfg.path_separators_compress = enabled;
+ }
+
+ /// Configures whether plus characters are converted to spaces when decoding URL-encoded strings. This
+ /// is appropriate to do for parameters, but not for URLs. Only applies to contexts where decoding
+ /// is taking place.
+ pub fn set_plusspace_decode(&mut self, enabled: bool) {
+ self.decoder_cfg.plusspace_decode = enabled;
+ }
+
+ /// Configures whether input data will be converted to lowercase. Useful for handling servers with
+ /// case-insensitive filesystems.
+ pub fn set_convert_lowercase(&mut self, enabled: bool) {
+ self.decoder_cfg.convert_lowercase = enabled;
+ }
+
+ /// Controls whether the data should be treated as UTF-8 and converted to a single-byte
+ /// stream using best-fit mapping.
+ pub fn set_utf8_convert_bestfit(&mut self, enabled: bool) {
+ self.decoder_cfg.utf8_convert_bestfit = enabled;
+ }
+
+ /// Configures reaction to %u-encoded sequences in input data.
+ pub fn set_u_encoding_unwanted(&mut self, unwanted: HtpUnwanted) {
+ self.decoder_cfg.u_encoding_unwanted = unwanted;
+ }
+
+ /// Controls reaction to raw control characters in the data.
+ pub fn set_control_chars_unwanted(&mut self, unwanted: HtpUnwanted) {
+ self.decoder_cfg.control_chars_unwanted = unwanted;
+ }
+
+ /// Controls whether to use complete or partial URI normalization
+ pub fn set_normalized_uri_include_all(&mut self, set: bool) {
+ self.decoder_cfg.normalized_uri_include_all = set;
+ }
+
+ /// Configures how the server reacts to invalid URL encoding.
+ pub fn set_url_encoding_invalid_unwanted(&mut self, unwanted: HtpUnwanted) {
+ self.decoder_cfg.url_encoding_invalid_unwanted = unwanted;
+ }
+
+ /// Configures how the server reacts to leading whitespace on the request line.
+ pub fn set_requestline_leading_whitespace_unwanted(&mut self, unwanted: HtpUnwanted) {
+ self.requestline_leading_whitespace_unwanted = unwanted;
+ }
+
+ /// Configures whether request data is decompressed.
+ pub fn set_request_decompression(&mut self, set: bool) {
+ self.request_decompression_enabled = set;
+ }
+
+ /// Configures many layers of compression we try to decompress.
+ pub fn set_decompression_layer_limit(&mut self, limit: Option) {
+ self.compression_options.set_layer_limit(limit);
+ }
+}
diff --git a/rust/htp/src/connection.rs b/rust/htp/src/connection.rs
new file mode 100644
index 000000000000..0c81e10b8a34
--- /dev/null
+++ b/rust/htp/src/connection.rs
@@ -0,0 +1,135 @@
+use crate::log::{Log, Message};
+use std::{
+ net::IpAddr,
+ sync::mpsc::{channel, Receiver, Sender},
+ time::SystemTime,
+};
+use time::OffsetDateTime;
+
+/// Export Connection ConnectionFlags
+#[repr(C)]
+pub struct ConnectionFlags;
+
+/// `Connection` Flags
+impl ConnectionFlags {
+ /// Default, no flags raised.
+ pub const UNKNOWN: u8 = 0x00;
+ /// Seen pipelined requests.
+ pub const PIPELINED: u8 = 0x01;
+ /// Seen extra data after a HTTP 0.9 communication.
+ pub const HTTP_0_9_EXTRA: u8 = 0x02;
+}
+
+/// Stores information about the session.
+pub struct Connection {
+ /// Client IP address.
+ pub client_addr: Option,
+ /// Client port.
+ pub client_port: Option,
+ /// Server IP address.
+ pub server_addr: Option,
+ /// Server port.
+ pub server_port: Option,
+
+ /// Messages channel associated with this connection.
+ log_channel: (Sender, Receiver),
+
+ /// Parsing flags.
+ pub flags: u8,
+ /// When was this connection opened?
+ pub open_timestamp: OffsetDateTime,
+ /// When was this connection closed?
+ pub close_timestamp: OffsetDateTime,
+ /// Inbound data counter.
+ pub request_data_counter: u64,
+ /// Outbound data counter.
+ pub response_data_counter: u64,
+}
+
+impl Default for Connection {
+ /// Returns a new Connection instance with default values.
+ fn default() -> Self {
+ Self {
+ client_addr: None,
+ client_port: None,
+ server_addr: None,
+ server_port: None,
+ log_channel: channel(),
+ flags: 0,
+ open_timestamp: OffsetDateTime::from(SystemTime::now()),
+ close_timestamp: OffsetDateTime::from(SystemTime::now()),
+ request_data_counter: 0,
+ response_data_counter: 0,
+ }
+ }
+}
+
+impl Connection {
+ /// Opens a connection. This function will essentially only store the provided data
+ /// for future reference.
+ pub fn open(
+ &mut self, client_addr: Option, client_port: Option,
+ server_addr: Option, server_port: Option, timestamp: Option,
+ ) {
+ self.client_addr = client_addr;
+ self.client_port = client_port;
+ self.server_addr = server_addr;
+ self.server_port = server_port;
+
+ // Remember when the connection was opened.
+ if let Some(timestamp) = timestamp {
+ self.open_timestamp = timestamp;
+ }
+ }
+
+ /// Closes the connection.
+ pub fn close(&mut self, timestamp: Option) {
+ // Update timestamp.
+ if let Some(timestamp) = timestamp {
+ self.close_timestamp = timestamp;
+ }
+ }
+
+ /// Keeps track of inbound packets and data.
+ pub fn track_inbound_data(&mut self, len: usize) {
+ self.request_data_counter = (self.request_data_counter).wrapping_add(len as u64);
+ }
+
+ /// Keeps track of outbound packets and data.
+ pub fn track_outbound_data(&mut self, len: usize) {
+ self.response_data_counter = (self.response_data_counter).wrapping_add(len as u64);
+ }
+
+ /// Return the log channel sender
+ pub fn get_sender(&self) -> &Sender {
+ &self.log_channel.0
+ }
+
+ /// Drains and returns a vector of all current logs received by the log channel
+ pub fn get_logs(&self) -> Vec {
+ let mut logs = Vec::with_capacity(8);
+ while let Ok(message) = self.log_channel.1.try_recv() {
+ logs.push(Log::new(self, message))
+ }
+ logs
+ }
+
+ /// Returns the next logged message received by the log channel
+ pub fn get_next_log(&self) -> Option {
+ self.log_channel
+ .1
+ .try_recv()
+ .map(|message| Log::new(self, message))
+ .ok()
+ }
+}
+
+impl PartialEq for Connection {
+ /// Returns true if connections are the same, false otherwise.
+ fn eq(&self, rhs: &Self) -> bool {
+ self.client_addr == rhs.client_addr
+ && self.client_port == rhs.client_port
+ && self.server_addr == rhs.server_addr
+ && self.server_port == rhs.server_port
+ }
+}
diff --git a/rust/htp/src/connection_parser.rs b/rust/htp/src/connection_parser.rs
new file mode 100644
index 000000000000..2f8908c34847
--- /dev/null
+++ b/rust/htp/src/connection_parser.rs
@@ -0,0 +1,981 @@
+use crate::{
+ bstr::Bstr,
+ config::Config,
+ connection::{Connection, ConnectionFlags},
+ decompressors::HtpContentEncoding,
+ error::Result,
+ hook::DataHook,
+ log::Logger,
+ transaction::{HtpRequestProgress, HtpResponseProgress, HtpTransferCoding, Transaction},
+ transactions::Transactions,
+ util::{FlagOperations, HtpFlags},
+ HtpStatus,
+};
+use std::{any::Any, borrow::Cow, cell::Cell, net::IpAddr, rc::Rc, time::SystemTime};
+use time::OffsetDateTime;
+
+/// Enumerates parsing state.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum State {
+ /// Default state.
+ NONE,
+ /// State once a transaction is processed or about to be processed.
+ IDLE,
+ /// State for request/response line parsing.
+ LINE,
+ /// State for header parsing.
+ HEADERS,
+ /// State for finalizing chunked body data parsing.
+ BODY_CHUNKED_DATA_END,
+ /// State for chunked body data.
+ BODY_CHUNKED_DATA,
+ /// Parse the chunked length state.
+ BODY_CHUNKED_LENGTH,
+ /// State to determine encoding of body data.
+ BODY_DETERMINE,
+ /// State for finalizing transaction side.
+ FINALIZE,
+ // Used by request_state only
+ /// State for determining the request protocol.
+ PROTOCOL,
+ /// State to determine if there is a CONNECT request.
+ CONNECT_CHECK,
+ /// State to determine if inbound parsing needs to be suspended.
+ CONNECT_PROBE_DATA,
+ /// State to determine if inbound parsing can continue if it was suspended.
+ CONNECT_WAIT_RESPONSE,
+ /// State to process request body data.
+ BODY_IDENTITY,
+ /// State to consume remaining data in request buffer for the HTTP 0.9 case.
+ IGNORE_DATA_AFTER_HTTP_0_9,
+ // Used by response_state only
+ /// State to consume response remaining body data when content-length is unknown.
+ BODY_IDENTITY_STREAM_CLOSE,
+ /// State to consume response body data when content-length is known.
+ BODY_IDENTITY_CL_KNOWN,
+}
+
+/// Enumerates all stream states. Each connection has two streams, one
+/// inbound and one outbound. Their states are tracked separately.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum HtpStreamState {
+ /// Default stream state.
+ NEW,
+ /// State when connection is open.
+ OPEN,
+ /// State when connection is closed.
+ CLOSED,
+ /// State when stream produces a fatal error.
+ ERROR,
+ /// State for a tunnelled stream.
+ TUNNEL,
+ /// State when parsing is suspended and not consumed in order. This is to
+ /// allow processing on another stream.
+ DATA_OTHER,
+ /// State when we should stop parsing the associated connection.
+ STOP,
+ /// State when all current data in the stream has been processed.
+ DATA,
+}
+
+#[derive(Debug, Default, Clone)]
+/// This structure is used to pass data (for example
+/// request and response body buffers or gaps) to parsers.
+pub struct ParserData<'a> {
+ /// Ref to the data buffer.
+ data: Option>,
+ // Length of data gap. Only set if is a gap.
+ gap_len: Option,
+ // Current position offset of the data to parse
+ position: Cell,
+ // Current callback data position
+ callback_position: usize,
+}
+
+impl<'a> ParserData<'a> {
+ /// Returns a pointer to the raw data associated with Data.
+ /// This returns a pointer to the entire data chunk.
+ pub fn data_ptr(&self) -> *const u8 {
+ self.data()
+ .as_ref()
+ .map(|data| data.as_ptr())
+ .unwrap_or(std::ptr::null())
+ }
+
+ /// Returns the unconsumed data
+ pub fn data(&self) -> Option<&[u8]> {
+ let data = self.data.as_ref()?;
+ if self.position.get() <= data.len() {
+ Some(&data[self.position.get()..])
+ } else {
+ None
+ }
+ }
+
+ /// Returns the length of the unconsumed data.
+ pub fn len(&self) -> usize {
+ if let Some(gap_len) = self.gap_len {
+ if self.position.get() >= gap_len {
+ 0
+ } else {
+ gap_len - self.position.get()
+ }
+ } else {
+ self.as_slice().len()
+ }
+ }
+
+ /// Returns how much data has been consumed so far
+ fn consumed_len(&self) -> usize {
+ self.position.get()
+ }
+
+ /// Return an immutable slice view of the unconsumed data.
+ pub fn as_slice(&self) -> &[u8] {
+ if let Some(data) = self.data.as_ref() {
+ if self.position.get() <= data.len() {
+ return &data[self.position.get()..];
+ }
+ }
+ b""
+ }
+
+ /// Determines if this chunk is a gap or not
+ pub fn is_gap(&self) -> bool {
+ self.gap_len.is_some()
+ }
+
+ /// Determine whether there is no more data to consume.
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Set the position offset into the data for parsing
+ fn set_position(&self, position: usize) {
+ self.position.set(position);
+ }
+
+ /// Advances the internal position where we are parsing
+ pub fn consume(&self, consumed: usize) {
+ self.set_position(self.position.get() + consumed);
+ }
+
+ /// Decrements the internal position where we are parsing
+ fn unconsume(&self, unconsume: usize) {
+ if unconsume < self.position.get() {
+ self.set_position(self.position.get() - unconsume);
+ } else {
+ self.set_position(0);
+ }
+ }
+
+ /// Make an owned version of this data.
+ pub fn into_owned(self) -> ParserData<'static> {
+ ParserData {
+ data: self.data.map(|d| Cow::Owned(d.into_owned())),
+ gap_len: self.gap_len,
+ position: self.position,
+ callback_position: self.callback_position,
+ }
+ }
+
+ /// Callback data is raw data buffer content that is passed to the
+ /// application via the header and trailer data hooks.
+ ///
+ /// This function will return any data that has been consumed but not
+ /// yet returned from this function.
+ pub fn callback_data(&mut self) -> &[u8] {
+ if let Some(data) = self.data.as_ref() {
+ if self.position.get() <= data.len() && self.callback_position <= self.position.get() {
+ let d = &data[self.callback_position..self.position.get()];
+ self.callback_position = self.position.get();
+ return d;
+ }
+ }
+ b""
+ }
+
+ /// Sets the callback start location to the current parsing location
+ pub fn reset_callback_start(&mut self) {
+ self.callback_position = self.position.get();
+ }
+}
+
+impl<'a> From