diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 3540b600..d11cc549 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -7,46 +7,76 @@ on:
   push:
   pull_request:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 permissions:
   contents: read
 
 env:
+  GCC_VERSION: "11"
+  LLVM_VERSION: "17"
   COMMON_CMAKE_FLAGS: |
     -DSLEEF_SHOW_CONFIG=1
-    -DDISABLE_SSL=ON
     -DBUILD_GNUABI_LIBS=ON
     -DBUILD_INLINE_HEADERS=ON
     -DBUILD_DFT=ON
     -DBUILD_QUAD=ON
     -DBUILD_SCALAR_LIB=ON
     -DBUILD_STATIC_TEST_BINS=ON
+    -DENFORCE_TESTER=ON
+    -DENFORCE_TESTER3=ON
 
 jobs:
   build-native:
     runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        compiler: [gcc, llvm]
+
+    name: build-native-${{ matrix.compiler }}
     steps:
       - uses: actions/checkout@v4.1.1
         with:
           persist-credentials: false
 
       - name: Install dependencies
-        run: sudo apt-get update -y -qq && sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev
+        run: |
+          sudo apt-get update -y -qq
+          sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev
+
+      # Needed for llvm builds as well for target libraries
+      - name: Install gcc
+        run: |
+          sudo apt-get install -y -qq gcc-${GCC_VERSION}
+
+      - name: Install llvm
+        run: |
+          curl -o llvm.sh https://apt.llvm.org/llvm.sh
+          chmod u+x llvm.sh
+          sudo ./llvm.sh ${LLVM_VERSION}
+          sudo ln -srf $(which clang-${LLVM_VERSION}) /usr/bin/clang
+          rm llvm.sh
+        if: ${{ matrix.compiler == 'llvm' }}
 
       - name: Build native
+        shell: bash -ex -o pipefail {0}
         run: |
-          set -x
           EXTRA_CMAKE_FLAGS="-DENFORCE_SSE2=ON -DENFORCE_SSE4=ON -DENFORCE_AVX=ON -DENFORCE_AVX=ON -DENFORCE_AVX2=ON -DENFORCE_AVX512F=ON -DENFORCE_FMA4=ON"
           cmake -S . -B _build-native -GNinja \
             -DCMAKE_INSTALL_PREFIX=$(pwd)/_install-native \
+            -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-native-${{ matrix.compiler }}.cmake \
             ${COMMON_CMAKE_FLAGS} \
             ${EXTRA_CMAKE_FLAGS}
           cmake --build _build-native
           cmake --install _build-native
 
-      - name: Upload build-native artifacts
+      - name: Upload build-native-${{ matrix.compiler }} artifacts
         uses: actions/upload-artifact@v3
         with:
-          name: build-native
+          name: build-native-${{ matrix.compiler }}
           path: |
             _build-*
             _install-*
@@ -55,6 +85,12 @@ jobs:
   test-native:
     runs-on: ubuntu-latest
     needs: [build-native]
+    strategy:
+      fail-fast: false
+      matrix:
+        compiler: [gcc, llvm]
+
+    name: test-native-${{ matrix.compiler }}
     steps:
       - uses: actions/checkout@v4.1.1
         with:
@@ -67,12 +103,12 @@ jobs:
         run: |
           cat /proc/cpuinfo
 
-      - name: Download build-native artifacts
+      - name: Download build-native-${{ matrix.compiler }} artifacts
         uses: actions/download-artifact@v3
         with:
-          name: build-native
+          name: build-native-${{ matrix.compiler }}
 
-      - name: Fix build-native permissions
+      - name: Fix _build-native permissions
         run: |
           chmod +x _build-native/bin/*
 
@@ -97,19 +133,24 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
+        arch: [aarch64, armhf, ppc64el, s390x, riscv64]
+        compiler: [gcc, llvm]
         include:
-          # AArch64
-          - arch: aarch64
-          # Aarch32
           - arch: armhf
-            package: -arm-linux-gnueabihf
-          # PPC64
+            binfmt: arm
+            gnupkg: -arm-linux-gnueabihf
           - arch: ppc64el
-            package: -powerpc64le-linux-gnu
-          # IBM Z
-          - arch: s390x
-
-    name: build-${{ matrix.arch }}
+            binfmt: ppc64le
+            gnupkg: -powerpc64le-linux-gnu
+          - arch: aarch64
+            debarch: arm64
+        exclude:
+          # Only GCC trunk supports the RISC-V V intrinsics and https://github.com/riscv-collab/riscv-gnu-toolchain
+          # doesn't track a recent enough version yet
+          - arch: riscv64
+            compiler: gcc
+
+    name: build-${{ matrix.arch }}-${{ matrix.compiler }}
     steps:
       - uses: actions/checkout@v4.1.1
         with:
@@ -118,20 +159,58 @@ jobs:
       - name: Install dependencies
         run: |
           sudo apt-get update -y -qq
-          sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.package || format('-{0}-linux-gnu', matrix.arch) }}
+          sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev debootstrap
+
+      # Needed for llvm builds as well for target libraries
+      - name: Install gcc
+        run: |
+          sudo apt-get install -y -qq gcc-${GCC_VERSION}${{ matrix.gnupkg || format('-{0}-linux-gnu', matrix.arch) }}
 
-      - name: Download build-native artifacts
+      - name: Install llvm
+        run: |
+          curl -o llvm.sh https://apt.llvm.org/llvm.sh
+          chmod u+x llvm.sh
+          sudo ./llvm.sh ${LLVM_VERSION}
+          sudo ln -srf $(which clang-${LLVM_VERSION}) /usr/bin/clang
+          rm llvm.sh
+        if: ${{ matrix.compiler == 'llvm' }}
+
+      - name: Setup QEMU
+        uses: docker/setup-qemu-action@v3.0.0
+        with:
+          platforms: ${{ matrix.binfmt || matrix.arch }}
+
+      - name: Check sysroot cache
+        id: check-sysroot-cache
+        uses: actions/cache@v3
+        with:
+          path: sysroot
+          key: sysroot-${{ matrix.arch }}-${{ hashFiles('./.github/workflows/build_and_test.yml') }}
+
+      - name: Create sysroot
+        run: |
+          sudo debootstrap --arch=${{ matrix.debarch || matrix.arch }} --verbose --include=fakeroot,symlinks,libmpfr-dev,libssl-dev --resolve-deps --variant=minbase --components=main,universe focal sysroot
+          # Remove unused files to minimize cache
+          sudo chroot sysroot symlinks -cr .
+          sudo chown ${USER} -R sysroot
+          rm -rf sysroot/{dev,proc,run,sys,var}
+          rm -rf sysroot/usr/{sbin,bin,share}
+          rm -rf sysroot/usr/lib/{apt,gcc,udev,systemd}
+          rm -rf sysroot/usr/libexec/gcc
+        if: steps.check-sysroot-cache.outputs.cache-hit != 'true'
+
+      - name: Download build-native-${{ matrix.compiler }} artifacts
         uses: actions/download-artifact@v3
         with:
-          name: build-native
+          name: build-native-${{ matrix.compiler }}
 
-      - name: Fix build-native permissions
+      - name: Fix _build-native permissions
         run: |
           chmod +x _build-native/bin/*
 
       - name: Build ${{ matrix.arch }}
+        shell: bash -ex -o pipefail {0}
         run: |
-          set -x
           EXTRA_CMAKE_FLAGS=""
           if [[ ${{ matrix.arch }} = "aarch64" ]]; then
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_SVE=ON"
@@ -142,22 +221,32 @@ jobs:
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VSX=ON -DENFORCE_VSX3=ON"
           elif [[ ${{ matrix.arch }} = "s390x" ]]; then
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE=ON"
-            # Disable VXE2 support, QEMU doesn't support it
+            # Disable VXE2 support, QEMU doesn't support some instructions generated by gcc or llvm
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON"
+          elif [[ ${{ matrix.arch }} = "riscv64" ]]; then
+            EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_RVVM1=ON -DENFORCE_RVVM2=ON"
+            # Disable inline headers, they just don't compile on riscv64
+            EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_INLINE_HEADERS=OFF"
+            # Disable dft, it fails with linker error to `cexp`
+            EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_DFT=OFF"
+            # Disable quad, it's missing the `Sleef_quad` function
+            EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_QUAD=OFF"
           fi
+
           cmake -S . -B _build-${{ matrix.arch }} -GNinja \
             -DCMAKE_INSTALL_PREFIX="$(pwd)/_install-${{ matrix.arch }}" \
-            -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-${{ matrix.arch }}.cmake \
+            -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-${{ matrix.arch }}-${{ matrix.compiler }}.cmake \
+            -DCMAKE_SYSROOT=$(pwd)/sysroot \
             -DNATIVE_BUILD_DIR="$(pwd)/_build-native" \
             ${COMMON_CMAKE_FLAGS} \
             ${EXTRA_CMAKE_FLAGS}
           cmake --build _build-${{ matrix.arch }}
           cmake --install _build-${{ matrix.arch }}
 
-      - name: Upload build-${{ matrix.arch }} artifacts
+      - name: Upload build-${{ matrix.arch }}-${{ matrix.compiler }} artifacts
         uses: actions/upload-artifact@v3
         with:
-          name: build-${{ matrix.arch }}
+          name: build-${{ matrix.arch }}-${{ matrix.compiler }}
           path: |
             _build-${{ matrix.arch }}
             _install-${{ matrix.arch }}
@@ -172,32 +261,88 @@ jobs:
         include:
           # AArch64
           - arch: aarch64
+            compiler: gcc
             qemu_cpu: "max,sve=off"
           - arch: aarch64
+            compiler: gcc
             qemu_cpu: "max,sve=on,sve128=on"
           - arch: aarch64
+            compiler: gcc
             qemu_cpu: "max,sve=on,sve256=on"
           - arch: aarch64
+            compiler: gcc
             qemu_cpu: "max,sve=on,sve512=on"
+          # Some tests fail when compiled with LLVM only
+          # - arch: aarch64
+          #   compiler: llvm
+          #   qemu_cpu: "max,sve=off"
+          # - arch: aarch64
+          #   compiler: llvm
+          #   qemu_cpu: "max,sve=on,sve128=on"
+          # - arch: aarch64
+          #   compiler: llvm
+          #   qemu_cpu: "max,sve=on,sve256=on"
+          # - arch: aarch64
+          #   compiler: llvm
+          #   qemu_cpu: "max,sve=on,sve512=on"
           # Aarch32
           - arch: armhf
+            compiler: gcc
+            binfmt: arm
+            qemu_cpu: "max"
+          - arch: armhf
+            compiler: llvm
             binfmt: arm
             qemu_cpu: "max"
           # PPC64
           - arch: ppc64el
+            compiler: gcc
+            binfmt: ppc64le
+            qemu_cpu: "power10"
+          - arch: ppc64el
+            compiler: llvm
             binfmt: ppc64le
             qemu_cpu: "power10"
           # IBM Z
           # TODO: figure out qemu_cpu variable to make tests pass on QEMU
           - arch: s390x
-
-    name: "test-${{ matrix.arch }} (qemu_cpu: \"${{ matrix.qemu_cpu }}\")"
+            compiler: gcc
+          - arch: s390x
+            compiler: llvm
+          # RISC-V
+          # - arch: riscv64
+          #   compiler: gcc
+          #   qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=false"
+          # - arch: riscv64
+          #   compiler: gcc
+          #   qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0"
+          # - arch: riscv64
+          #   compiler: gcc
+          #   qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0"
+          # - arch: riscv64
+          #   compiler: gcc
+          #   qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0"
+          - arch: riscv64
+            compiler: llvm
+            qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=false"
+          - arch: riscv64
+            compiler: llvm
+            qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0"
+          - arch: riscv64
+            compiler: llvm
+            qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0"
+          - arch: riscv64
+            compiler: llvm
+            qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0"
+
+    name: "test-${{ matrix.arch }}-${{ matrix.compiler }} (qemu_cpu: \"${{ matrix.qemu_cpu }}\")"
     steps:
       - uses: actions/checkout@v4.1.1
         with:
           persist-credentials: false
 
-      - uses: docker/setup-qemu-action@v3.0.0
+      - name: Setup QEMU
+        uses: docker/setup-qemu-action@v3.0.0
         with:
           platforms: ${{ matrix.binfmt || matrix.arch }}
 
@@ -208,20 +353,19 @@ jobs:
         run: |
           cat /proc/cpuinfo
 
-      - name: Download build-native artifacts
+      - name: Download build-native-${{ matrix.compiler }} artifacts
         uses: actions/download-artifact@v3
         with:
-          name: build-native
+          name: build-native-${{ matrix.compiler }}
 
-      - name: Download build-${{ matrix.arch }} artifacts
+      - name: Download build-${{ matrix.arch }}-${{ matrix.compiler }} artifacts
         uses: actions/download-artifact@v3
         with:
-          name: build-${{ matrix.arch }}
+          name: build-${{ matrix.arch }}-${{ matrix.compiler }}
 
-      - name: Fix build-native and _build-${{ matrix.arch }} permissions
+      - name: Fix _build-native and _build-${{ matrix.arch }} permissions
         run: |
-          chmod +x _build-native/bin/*
-          chmod +x _build-${{ matrix.arch }}/bin/*
+          chmod +x _build-native/bin/* _build-${{ matrix.arch }}/bin/*
 
       - name: Test ${{ matrix.arch }}
         env:
@@ -233,10 +377,10 @@ jobs:
           cd _build-${{ matrix.arch }}
           ctest -j$(nproc)
 
-      - name: Upload test-${{ matrix.arch }}-${{ strategy.job-index }} artifacts
+      - name: Upload test-${{ matrix.arch }}-${{ matrix.compiler }}-${{ strategy.job-index }} artifacts
         uses: actions/upload-artifact@v3
         with:
-          name: test-${{ matrix.arch }}-${{ strategy.job-index }}
+          name: test-${{ matrix.arch }}-${{ matrix.compiler }}-${{ strategy.job-index }}
           path: |
             _build-${{ matrix.arch }}/Testing
         if: always()
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ec9e04e3..c38219f1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,6 +46,7 @@ set(SLEEF_ALL_SUPPORTED_EXTENSIONS
   NEON32 NEON32VFPV4                                    # Aarch32
   VSX VSXNOFMA VSX3 VSX3NOFMA                           # PPC64
   VXE VXENOFMA VXE2 VXE2NOFMA	                        # IBM Z
+  RVVM1NOFMA RVVM1 RVVM2NOFMA RVVM2                     # RISC-V Vectors
   PUREC_SCALAR PURECFMA_SCALAR                          # Generic type
   CACHE STRING "List of SIMD architectures supported by libsleef."
   )
@@ -56,6 +57,7 @@ set(SLEEF_SUPPORTED_LIBM_EXTENSIONS
   NEON32 NEON32VFPV4                                    # Aarch32
   VSX VSXNOFMA VSX3 VSX3NOFMA                           # PPC64
   VXE VXENOFMA VXE2 VXE2NOFMA	                        # IBM Z
+  RVVM1NOFMA RVVM1 RVVM2NOFMA RVVM2                     # RISC-V Vectors
   PUREC_SCALAR PURECFMA_SCALAR                          # Generic type
   CACHE STRING "List of SIMD architectures supported by libsleef."
   )
@@ -85,6 +87,10 @@ set(COSTOVERRIDE_NEON32 2)
 set(COSTOVERRIDE_NEON32VFPV4 2)
 set(COSTOVERRIDE_SVE 10)
 set(COSTOVERRIDE_SVENOFMA 10)
+set(COSTOVERRIDE_RVVM1 10)
+set(COSTOVERRIDE_RVVM1NOFMA 10)
+set(COSTOVERRIDE_RVVM2 20)
+set(COSTOVERRIDE_RVVM2NOFMA 20)
 
 #
 
diff --git a/Configure.cmake b/Configure.cmake
index 63d2c638..58fe5403 100644
--- a/Configure.cmake
+++ b/Configure.cmake
@@ -118,7 +118,10 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
   set(SLEEF_ARCH_S390X ON CACHE INTERNAL "True for IBM Z architecture.")
 
+  set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-march=z14;-mzvector")
   set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z14;-mzvector")
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
+  set(SLEEF_ARCH_RISCV64 ON CACHE INTERNAL "True for RISCV64 architecture.")
 endif()
 
 set(COMPILER_SUPPORTS_PUREC_SCALAR 1)
@@ -163,6 +166,11 @@ set(CLANG_FLAGS_ENABLE_VXE "-march=z14;-mzvector")
 set(CLANG_FLAGS_ENABLE_VXENOFMA "-march=z14;-mzvector")
 set(CLANG_FLAGS_ENABLE_VXE2 "-march=z15;-mzvector")
 set(CLANG_FLAGS_ENABLE_VXE2NOFMA "-march=z15;-mzvector")
+# RISC-V
+set(CLANG_FLAGS_ENABLE_RVVM1 "-march=rv64gcv_zba_zbb_zbs")
+set(CLANG_FLAGS_ENABLE_RVVM1NOFMA "-march=rv64gcv_zba_zbb_zbs")
+set(CLANG_FLAGS_ENABLE_RVVM2 "-march=rv64gcv_zba_zbb_zbs")
+set(CLANG_FLAGS_ENABLE_RVVM2NOFMA "-march=rv64gcv_zba_zbb_zbs")
 
 set(FLAGS_OTHERS "")
 
@@ -616,6 +624,50 @@ if (ENFORCE_VXE2 AND NOT COMPILER_SUPPORTS_VXE2)
   message(FATAL_ERROR "ENFORCE_VXE2 is specified and that feature is disabled or not supported by the compiler")
 endif()
 
+# RVVM1
+
+option(DISABLE_RVVM1 "Disable RVVM1" OFF)
+option(ENFORCE_RVVM1 "Build fails if RVVM1 is not supported by the compiler" OFF)
+
+if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM1)
+  string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_RVVM1}")
+  CHECK_C_SOURCE_COMPILES("
+  #include <riscv_vector.h>
+  int main() {
+    vint32m1_t r = __riscv_vmv_v_x_i32m1(1, __riscv_vlenb() * 8 / 32); }"
+    COMPILER_SUPPORTS_RVVM1)
+
+  if(COMPILER_SUPPORTS_RVVM1)
+    set(COMPILER_SUPPORTS_RVVM1NOFMA 1)
+  endif()
+endif()
+
+if (ENFORCE_RVVM1 AND NOT COMPILER_SUPPORTS_RVVM1)
+  message(FATAL_ERROR "ENFORCE_RVVM1 is specified and that feature is disabled or not supported by the compiler")
+endif()
+
+# RVVM2
+
+option(DISABLE_RVVM2 "Disable RVVM2" OFF)
+option(ENFORCE_RVVM2 "Build fails if RVVM2 is not supported by the compiler" OFF)
+
+if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM2)
+  string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_RVVM2}")
+  CHECK_C_SOURCE_COMPILES("
+  #include <riscv_vector.h>
+  int main() {
+    vint32m2_t r = __riscv_vmv_v_x_i32m2(1, 2 * __riscv_vlenb() * 8 / 32); }"
+    COMPILER_SUPPORTS_RVVM2)
+
+  if(COMPILER_SUPPORTS_RVVM2)
+    set(COMPILER_SUPPORTS_RVVM2NOFMA 1)
+  endif()
+endif()
+
+if (ENFORCE_RVVM2 AND NOT COMPILER_SUPPORTS_RVVM2)
+  message(FATAL_ERROR "ENFORCE_RVVM2 is specified and that feature is disabled or not supported by the compiler")
+endif()
+
 # CUDA
 
 option(ENFORCE_CUDA "Build fails if CUDA is not supported" OFF)
diff --git a/src/arch/helperpurec_scalar.h b/src/arch/helperpurec_scalar.h
index d8b9c845..fb83b84c 100644
--- a/src/arch/helperpurec_scalar.h
+++ b/src/arch/helperpurec_scalar.h
@@ -54,7 +54,7 @@
 #define ENABLE_FMA_SP
 //@#define ENABLE_FMA_SP
 
-#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || CONFIG == 3
+#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || defined(__riscv) || CONFIG == 3
 #ifndef FP_FAST_FMA
 //@#ifndef FP_FAST_FMA
 #define FP_FAST_FMA
diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h
new file mode 100644
index 00000000..8450e51a
--- /dev/null
+++ b/src/arch/helperrvv.h
@@ -0,0 +1,1067 @@
+#ifndef HELPERRVV_H
+#define HELPERRVV_H
+
+#if !defined(SLEEF_GENHEADER)
+#include <riscv_vector.h>
+#include "misc.h"
+
+#if defined(VECTLENDP) || defined(VECTLENSP)
+#error VECTLENDP or VECTLENSP already defined
+#endif
+#endif // #if !defined(SLEEF_GENHEADER)
+
+#if CONFIG == 1 || CONFIG == 2
+#define ISANAME "RISC-V Vector Extension with Min. VLEN"
+#define SLEEF_RVV_VLEN __riscv_vlenb()
+#elif CONFIG == 7
+// 128-bit vector length
+#define ISANAME "RISC-V Vector Extension 128-bit"
+#define SLEEF_RVV_VLEN ((1 << 7) / 8)
+#elif CONFIG == 8
+// 256-bit vector length
+#define ISANAME "RISC-V Vector Extension 256-bit"
+#define SLEEF_RVV_VLEN ((1 << 8) / 8)
+#elif CONFIG == 9
+// 512-bit vector length
+#define ISANAME "RISC-V Vector Extension 512-bit"
+#define SLEEF_RVV_VLEN ((1 << 9) / 8)
+#elif CONFIG == 10
+// 1024-bit vector length
+#define ISANAME "RISC-V Vector Extension 1024-bit"
+#define SLEEF_RVV_VLEN ((1 << 10) / 8)
+#elif CONFIG == 11
+// 2048-bit vector length
+#define ISANAME "RISC-V Vector Extension 2048-bit"
+#define SLEEF_RVV_VLEN ((1 << 11) / 8)
+#else
+#error CONFIG macro invalid or not defined
+#endif
+
+#define ENABLE_SP
+#define ENABLE_DP
+
+#if CONFIG != 2
+#define ENABLE_FMA_SP
+#define ENABLE_FMA_DP
+#endif
+
+static INLINE int vavailability_i(int name) { return -1; }
+
+////////////////////////////////////////////////////////////////////////////////
+// RISC-V Vector Types
+////////////////////////////////////////////////////////////////////////////////
+
+// About the RISC-V Vector type translations:
+//
+// Because the single- and double-precision versions of the RVV port have
+// conflicting definitions of the vmask and vopmask types, they can only
+// be defined for at most one precision level in a single translation unit.
+// Any functions that use vmask or vopmask types are thus enabled only by the
+// corresponding ENABLE_RVV_SP or ENABLE_RVV_DP macro guards.
+#if defined(ENABLE_RVV_SP) && defined(ENABLE_RVV_DP)
+#error Cannot simultaneously define ENABLE_RVV_SP and ENABLE_RVV_DP
+#endif
+
+#ifdef ENABLE_RVV_SP
+// Types that conflict with ENABLE_RVV_DP
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
+typedef vuint64m2_t vmask;
+typedef vbool32_t vopmask;
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
+typedef vuint64m4_t vmask;
+typedef vbool16_t vopmask;
+#else
+#error "unknown rvv lmul"
+#endif
+#endif
+
+#ifdef ENABLE_RVV_DP
+// Types that conflict with ENABLE_RVV_SP
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
+typedef vuint64m1_t vmask;
+typedef vbool64_t vopmask;
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
+typedef vuint64m2_t vmask;
+typedef vbool32_t vopmask;
+#else
+#error "unknown rvv lmul"
+#endif
+#endif
+
+// LMUL-Dependent Type & Macro Definitions:
+//
+// Some SLEEF types are multi-value structs. RVV vectors have unknown length at
+// compile time, so they cannote appear in a struct in Clang. They are instead
+// represented as single vectors with "members" packed into the registers of a
+// wide-LMUL register group. In the largest cases (ddi_t and ddf_t), this
+// requires LMUL=8 if the base type (vfloat or vdouble) has LMUL=2, meaning
+// LMUL=2 is currently the widest option for SLEEF function argument types.
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
+
+typedef vint32mf2_t vint;
+typedef vfloat64m1_t vdouble;
+typedef vfloat64m2_t vdouble2;
+typedef vfloat64m4_t vdouble3;
+typedef vfloat64m4_t dd2;
+typedef vuint64m2_t vquad;
+typedef vint32m2_t di_t;
+typedef vint32m4_t ddi_t;
+typedef vfloat32m1_t vfloat;
+typedef vfloat32m2_t vfloat2;
+typedef vfloat32m4_t df2;
+typedef vint32m1_t vint2;
+typedef vint32m2_t fi_t;
+typedef vint32m4_t dfi_t;
+#define SLEEF_RVV_SP_LMUL 1
+#define SLEEF_RVV_DP_LMUL 1
+#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / sizeof(float))
+#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / sizeof(double))
+#define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m1
+#define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m1
+#define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m1
+#define SLEEF_RVV_SP_VREINTERPRET_VF __riscv_vreinterpret_f32m1
+#define SLEEF_RVV_SP_VREINTERPRET_VF2 __riscv_vreinterpret_f32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1
+#define SLEEF_RVV_SP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2
+#define SLEEF_RVV_SP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4
+#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m1
+#define SLEEF_RVV_SP_VREINTERPRET_VU2 __riscv_vreinterpret_u32m1
+#define SLEEF_RVV_SP_VGET_VI2 __riscv_vget_i32m1
+#define SLEEF_RVV_SP_VGET_2VI __riscv_vget_i32m2
+#define SLEEF_RVV_SP_VGET_VF __riscv_vget_f32m1
+#define SLEEF_RVV_SP_VGET_VF2 __riscv_vget_f32m2
+#define SLEEF_RVV_SP_VGET_4VF __riscv_vget_f32m4
+#define SLEEF_RVV_SP_VGET_VU2 __riscv_vget_u32m2
+#define SLEEF_RVV_SP_LOAD_VF __riscv_vle32_v_f32m1
+#define SLEEF_RVV_SP_LOAD_VI2 __riscv_vle32_v_i32m1
+#define SLEEF_RVV_SP_VCAST_VM_U __riscv_vmv_v_x_u64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m1
+#define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m1
+#define SLEEF_RVV_SP_VFNCVT_X_F_VI __riscv_vfcvt_x_f_v_i32m1_rm
+#define SLEEF_RVV_SP_VFCVT_F_X_VF __riscv_vfcvt_f_x_v_f32m1
+#define SLEEF_RVV_SP_VFCVT_X_F_VF_RM __riscv_vfcvt_x_f_v_i32m1_rm
+#define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m1
+#define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP)
+#define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32mf2
+#define SLEEF_RVV_DP_VCAST_VM_U __riscv_vmv_v_x_u64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VD __riscv_vreinterpret_f64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VD2 __riscv_vreinterpret_f64m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \
+  __riscv_vreinterpret_v_i64m2_i32m2(__riscv_vreinterpret_i64m2(x))
+#define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \
+  __riscv_vreinterpret_f64m2(__riscv_vreinterpret_v_i32m2_i64m2(x))
+#define SLEEF_RVV_DP_VREINTERPRET_4VD __riscv_vreinterpret_f64m4
+#define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \
+  __riscv_vreinterpret_f64m4(__riscv_vreinterpret_v_i32m4_i64m4(x))
+#define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \
+  __riscv_vreinterpret_v_i64m4_i32m4(__riscv_vreinterpret_i64m4(x))
+#define SLEEF_RVV_DP_VREINTERPRET_VM __riscv_vreinterpret_u64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VU64 __riscv_vreinterpret_u64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VI __riscv_vreinterpret_i32mf2
+#define SLEEF_RVV_DP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1
+#define SLEEF_RVV_DP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4
+#define SLEEF_RVV_DP_VREINTERPRET_8VI __riscv_vreinterpret_i32m8
+#define SLEEF_RVV_DP_VREINTERPRET_VU __riscv_vreinterpret_u32mf2
+#define SLEEF_RVV_DP_VREINTERPRET_2VU __riscv_vreinterpret_u32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VU __riscv_vreinterpret_u32m4
+#define SLEEF_RVV_DP_VGET_VM __riscv_vget_u64m1
+#define SLEEF_RVV_DP_VGET_VD __riscv_vget_f64m1
+#define SLEEF_RVV_DP_VGET_VD2 __riscv_vget_f64m2
+#define SLEEF_RVV_DP_VGET_4VD __riscv_vget_f64m2
+#define SLEEF_RVV_DP_VGET_VI __riscv_vget_i32m1
+#define SLEEF_RVV_DP_VGET_VI2 __riscv_vget_i32m1
+#define SLEEF_RVV_DP_VGET_2VI __riscv_vget_i32m1
+#define SLEEF_RVV_DP_VGET_4VI __riscv_vget_i32m2
+#define SLEEF_RVV_DP_VGET_8VI __riscv_vget_i32m4
+#define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1
+#define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m1
+#define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32mf2
+#define SLEEF_RVV_DP_VFNCVT_X_F_VI __riscv_vfncvt_x_f_w_i32mf2_rm
+#define SLEEF_RVV_DP_VFCVT_F_X_VD __riscv_vfcvt_f_x_v_f64m1
+#define SLEEF_RVV_DP_VFCVT_X_F_VD_RM __riscv_vfcvt_x_f_v_i64m1_rm
+
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
+
+typedef vint32m1_t vint;
+typedef vfloat64m2_t vdouble;
+typedef vfloat64m4_t vdouble2;
+typedef vfloat64m8_t vdouble3;
+typedef vfloat64m8_t dd2;
+typedef vuint64m4_t vquad;
+typedef vint32m4_t di_t;
+typedef vint32m8_t ddi_t;
+typedef vfloat32m2_t vfloat;
+typedef vfloat32m4_t vfloat2;
+typedef vfloat32m8_t df2;
+typedef vint32m2_t vint2;
+typedef vint32m4_t fi_t;
+typedef vint32m8_t dfi_t;
+#define SLEEF_RVV_SP_LMUL 2
+#define SLEEF_RVV_DP_LMUL 2
+#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / sizeof(float))
+#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / sizeof(double))
+#define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m2
+#define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m2
+#define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VF __riscv_vreinterpret_f32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VF2 __riscv_vreinterpret_f32m4
+#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m2
+#define SLEEF_RVV_SP_VREINTERPRET_2VI __riscv_vreinterpret_i32m4
+#define SLEEF_RVV_SP_VREINTERPRET_4VI __riscv_vreinterpret_i32m8
+#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VU2 __riscv_vreinterpret_u32m2
+#define SLEEF_RVV_SP_VGET_VI2 __riscv_vget_i32m2
+#define SLEEF_RVV_SP_VGET_2VI __riscv_vget_i32m4
+#define SLEEF_RVV_SP_VGET_VF __riscv_vget_f32m2
+#define SLEEF_RVV_SP_VGET_VF2 __riscv_vget_f32m4
+#define SLEEF_RVV_SP_VGET_4VF __riscv_vget_f32m8
+#define SLEEF_RVV_SP_VGET_VU2 __riscv_vget_u32m4
+#define SLEEF_RVV_SP_LOAD_VF __riscv_vle32_v_f32m2
+#define SLEEF_RVV_SP_LOAD_VI2 __riscv_vle32_v_i32m2
+#define SLEEF_RVV_SP_VCAST_VM_U __riscv_vmv_v_x_u64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m2
+#define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m2
+#define SLEEF_RVV_SP_VFNCVT_X_F_VI __riscv_vfcvt_x_f_v_i32m2_rm
+#define SLEEF_RVV_SP_VFCVT_F_X_VF __riscv_vfcvt_f_x_v_f32m2
+#define SLEEF_RVV_SP_VFCVT_X_F_VF_RM __riscv_vfcvt_x_f_v_i32m2_rm
+#define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m2
+#define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP)
+#define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32m1
+#define SLEEF_RVV_DP_VCAST_VM_U __riscv_vmv_v_x_u64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VD __riscv_vreinterpret_f64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VD2 __riscv_vreinterpret_f64m4
+#define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \
+  __riscv_vreinterpret_v_i64m4_i32m4(__riscv_vreinterpret_i64m4(x))
+#define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \
+  __riscv_vreinterpret_f64m4(__riscv_vreinterpret_v_i32m4_i64m4(x))
+#define SLEEF_RVV_DP_VREINTERPRET_4VD __riscv_vreinterpret_f64m8
+#define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \
+  __riscv_vreinterpret_f64m8(__riscv_vreinterpret_v_i32m8_i64m8(x))
+#define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \
+  __riscv_vreinterpret_v_i64m8_i32m8(__riscv_vreinterpret_i64m8(x))
+#define SLEEF_RVV_DP_VREINTERPRET_VM __riscv_vreinterpret_u64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VU64 __riscv_vreinterpret_u64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VI __riscv_vreinterpret_i32m1
+#define SLEEF_RVV_DP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1
+#define SLEEF_RVV_DP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4
+#define SLEEF_RVV_DP_VREINTERPRET_8VI __riscv_vreinterpret_i32m8
+#define SLEEF_RVV_DP_VREINTERPRET_VU __riscv_vreinterpret_u32m1
+#define SLEEF_RVV_DP_VREINTERPRET_2VU __riscv_vreinterpret_u32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VU __riscv_vreinterpret_u32m4
+#define SLEEF_RVV_DP_VGET_VM __riscv_vget_u64m2
+#define SLEEF_RVV_DP_VGET_VD __riscv_vget_f64m2
+#define SLEEF_RVV_DP_VGET_VD2 __riscv_vget_f64m4
+#define SLEEF_RVV_DP_VGET_4VD __riscv_vget_f64m4
+#define SLEEF_RVV_DP_VGET_VI __riscv_vget_i32m1
+#define SLEEF_RVV_DP_VGET_VI2 __riscv_vget_i32m1
+#define SLEEF_RVV_DP_VGET_2VI __riscv_vget_i32m2
+#define SLEEF_RVV_DP_VGET_4VI __riscv_vget_i32m4
+#define SLEEF_RVV_DP_VGET_8VI __riscv_vget_i32m8
+#define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1
+#define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m2
+#define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32m1
+#define SLEEF_RVV_DP_VFNCVT_X_F_VI __riscv_vfncvt_x_f_w_i32m1_rm
+#define SLEEF_RVV_DP_VFCVT_F_X_VD __riscv_vfcvt_f_x_v_f64m2
+#define SLEEF_RVV_DP_VFCVT_X_F_VD_RM __riscv_vfcvt_x_f_v_i64m2_rm
+
+#else
+#error "unknown rvv lmul"
+#endif // ENABLE_RVVM1
+
+////////////////////////////////////////////////////////////////////////////////
+// Single-Precision Functions
+////////////////////////////////////////////////////////////////////////////////
+
+/****************************************/
+/* Multi-value and multi-word types     */
+/****************************************/
+// fi type
+static INLINE vfloat figetd_vf_di(fi_t d) {
+  return SLEEF_RVV_SP_VREINTERPRET_VF(SLEEF_RVV_SP_VGET_VI2(d, 0));
+}
+static INLINE vint2 figeti_vi2_di(fi_t d) {
+  return SLEEF_RVV_SP_VGET_VI2(d, 1);
+}
+static INLINE fi_t fisetdi_fi_vf_vi2(vfloat d, vint2 i) {
+  fi_t res;
+  res = __riscv_vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_VI2(d));
+  res = __riscv_vset(res, 1, i);
+  return res;
+}
+static INLINE vfloat2 dfigetdf_vf2_dfi(dfi_t d) {
+  return SLEEF_RVV_SP_VREINTERPRET_VF2(SLEEF_RVV_SP_VGET_2VI(d, 0));
+}
+static INLINE vint2 dfigeti_vi2_dfi(dfi_t d) {
+  return SLEEF_RVV_SP_VGET_VI2(d, 2);
+}
+static INLINE dfi_t dfisetdfi_dfi_vf2_vi2(vfloat2 v, vint2 i) {
+  dfi_t res;
+  res = __riscv_vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v));
+  res = __riscv_vset(res, 2, i);
+  return res;
+}
+static INLINE dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) {
+  return __riscv_vset(dfi, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v));
+}
+// vfloat2 type
+static INLINE vfloat vf2getx_vf_vf2(vfloat2 v) {
+  return SLEEF_RVV_SP_VGET_VF(v, 0);
+}
+static INLINE vfloat vf2gety_vf_vf2(vfloat2 v) {
+  return SLEEF_RVV_SP_VGET_VF(v, 1);
+}
+static INLINE vfloat2 vf2setxy_vf2_vf_vf(vfloat x, vfloat y) {
+  vfloat2 res;
+  res = __riscv_vset(res, 0, x);
+  res = __riscv_vset(res, 1, y);
+  return res;
+}
+static INLINE vfloat2 vf2setx_vf2_vf2_vf(vfloat2 v, vfloat d) {
+  return __riscv_vset(v, 0, d);
+}
+static INLINE vfloat2 vf2sety_vf2_vf2_vf(vfloat2 v, vfloat d) {
+  return __riscv_vset(v, 1, d);
+}
+// df2 type
+static df2 df2setab_df2_vf2_vf2(vfloat2 a, vfloat2 b) {
+  df2 res;
+  res = __riscv_vset(res, 0, a);
+  res = __riscv_vset(res, 1, b);
+  return res;
+}
+static vfloat2 df2geta_vf2_df2(df2 d) { return SLEEF_RVV_SP_VGET_VF2(d, 0); }
+static vfloat2 df2getb_vf2_df2(df2 d) { return SLEEF_RVV_SP_VGET_VF2(d, 1); }
+static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) {
+  return SLEEF_RVV_SP_VREINTERPRET_VI2(vf);
+}
+static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) {
+  return SLEEF_RVV_SP_VREINTERPRET_VF(vi);
+}
+
+
+/****************************************/
+/* Type Conversions and Broadcasts      */
+/****************************************/
+static INLINE vfloat vcast_vf_f(float f) {
+  return SLEEF_RVV_SP_VCAST_VF_F(f, VECTLENSP);
+}
+static INLINE vfloat vrint_vf_vf(vfloat vd) {
+  return SLEEF_RVV_SP_VFCVT_F_X_VF(SLEEF_RVV_SP_VFCVT_X_F_VF_RM(vd, __RISCV_FRM_RNE, VECTLENSP), VECTLENSP);
+}
+static INLINE vfloat vcast_vf_vi2(vint2 vi) {
+  return __riscv_vfcvt_f(vi, VECTLENSP);
+}
+static INLINE vint2 vcast_vi2_i(int i) {
+  return SLEEF_RVV_SP_VCAST_VI2_I(i, VECTLENSP);
+}
+static INLINE vint2 vrint_vi2_vf(vfloat vf) {
+  return SLEEF_RVV_SP_VFNCVT_X_F_VI(vf, __RISCV_FRM_RNE, VECTLENSP);
+}
+static INLINE vint2 vtruncate_vi2_vf(vfloat vf) {
+  return __riscv_vfcvt_rtz_x(vf, VECTLENSP);
+}
+static INLINE vfloat vtruncate_vf_vf(vfloat vf) {
+  return vcast_vf_vi2(vtruncate_vi2_vf(vf));
+}
+
+
+/****************************************/
+/* Memory Operations                    */
+/****************************************/
+static INLINE vfloat vload_vf_p(const float *ptr) {
+  return SLEEF_RVV_SP_LOAD_VF(ptr, VECTLENSP);
+}
+static INLINE vfloat vloadu_vf_p(const float *ptr) {
+  return SLEEF_RVV_SP_LOAD_VF(ptr, VECTLENSP);
+}
+static INLINE void vstore_v_p_vf(float *ptr, vfloat v) {
+  __riscv_vse32(ptr, v, VECTLENSP);
+}
+static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) {
+  __riscv_vse32(ptr, v, VECTLENSP);
+}
+static INLINE void vstoreu_v_p_vi2(int32_t *ptr, vint2 v) {
+  __riscv_vse32(ptr, v, VECTLENSP);
+}
+static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) {
+  return __riscv_vluxei32(ptr, __riscv_vmul(SLEEF_RVV_SP_VREINTERPRET_VU(vi2), sizeof(float), VECTLENSP), VECTLENSP);
+}
+
+
+/****************************************/
+/* Floating-Point Arithmetic            */
+/****************************************/
+static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vfadd(x, y, VECTLENSP);
+}
+static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vfsub(x, y, VECTLENSP);
+}
+static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vfmul(x, y, VECTLENSP);
+}
+static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vfdiv(x, y, VECTLENSP);
+}
+static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vfmax(x, y, VECTLENSP);
+}
+static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vfmin(x, y, VECTLENSP);
+}
+static INLINE vfloat vrec_vf_vf(vfloat d) {
+  return __riscv_vfdiv(vcast_vf_f(1.0f), d, VECTLENSP);
+}
+static INLINE vfloat vsqrt_vf_vf(vfloat d) {
+  return __riscv_vfsqrt(d, VECTLENSP);
+}
+#if defined(ENABLE_FMA_SP)
+// Multiply accumulate: z = z + x * y
+static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
+  return __riscv_vfmadd(x, y, z, VECTLENSP);
+}
+// Multiply subtract: z = z - x * y
+static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
+  return __riscv_vfnmsub(x, y, z, VECTLENSP);
+}
+static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
+  return __riscv_vfmsub(x, y, z, VECTLENSP);
+}
+#else
+static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); }
+static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); }
+static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); }
+#endif
+// fused multiply add / sub
+static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // (x * y) + z
+  return __riscv_vfmadd(x, y, z, VECTLENSP);
+}
+static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // -(x * y) + z
+  return __riscv_vfnmsub(x, y, z, VECTLENSP);
+}
+static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // (x * y) - z
+  return __riscv_vfmsub(x, y, z, VECTLENSP);
+}
+// sign manipulation
+static INLINE vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vfsgnjx(x, y, VECTLENSP);
+}
+static INLINE vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vfsgnj(x, y, VECTLENSP);
+}
+static INLINE vfloat vsign_vf_vf(vfloat f) {
+  return __riscv_vfsgnj(SLEEF_RVV_SP_VCAST_VF_F(1.0f, VECTLENSP), f, VECTLENSP);
+}
+static INLINE vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) {
+  vint2 xi = SLEEF_RVV_SP_VREINTERPRET_VI2(x);
+  vint2 yi = SLEEF_RVV_SP_VREINTERPRET_VI2(y);
+  vint2 xioryi = __riscv_vor(xi, yi, VECTLENSP);
+  vfloat xory = SLEEF_RVV_SP_VREINTERPRET_VF(xioryi);
+  return __riscv_vfsgnj(x, xory, VECTLENSP);
+}
+static INLINE vfloat vabs_vf_vf(vfloat f) {
+  return __riscv_vfabs(f, VECTLENSP);
+}
+static INLINE vfloat vneg_vf_vf(vfloat f) {
+  return __riscv_vfneg(f, VECTLENSP);
+}
+
+
+/****************************************/
+/* Integer Arithmetic and Logic         */
+/****************************************/
+static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return __riscv_vadd(x, y, VECTLENSP);
+}
+static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return __riscv_vsub(x, y, VECTLENSP);
+}
+static INLINE vint2 vneg_vi2_vi2(vint2 x) {
+  return __riscv_vneg(x, VECTLENSP);
+}
+static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return __riscv_vand(x, y, VECTLENSP);
+}
+static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return __riscv_vand(__riscv_vnot(x, VECTLENSP), y, VECTLENSP);
+}
+static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return __riscv_vor(x, y, VECTLENSP);
+}
+static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return __riscv_vxor(x, y, VECTLENSP);
+}
+static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) {
+  return __riscv_vsll(x, c, VECTLENSP);
+}
+static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) {
+  return __riscv_vsra(x, c, VECTLENSP);
+}
+static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) {
+  return SLEEF_RVV_SP_VREINTERPRET_VI2(__riscv_vsrl(SLEEF_RVV_SP_VREINTERPRET_VU2(x), c, VECTLENSP));
+}
+
+#ifdef ENABLE_RVV_SP
+/****************************************/
+/* Bitmask Operations                   */
+/****************************************/
+static INLINE vfloat vreinterpret_vf_vm(vmask vm) {
+  return SLEEF_RVV_SP_VREINTERPRET_VF(__riscv_vncvt_x(vm, VECTLENSP));
+}
+static INLINE vmask vreinterpret_vm_vf(vfloat vf) {
+  return __riscv_vwcvtu_x(SLEEF_RVV_SP_VREINTERPRET_VU(vf), VECTLENSP);
+}
+static INLINE int vtestallones_i_vo32(vopmask g) {
+  return __riscv_vcpop(g, VECTLENSP) == VECTLENSP;
+}
+static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) {
+  return SLEEF_RVV_SP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENSP);
+}
+static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) {
+  return __riscv_vand(x, y, VECTLENSP);
+}
+static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) {
+  return __riscv_vor(x, y, VECTLENSP);
+}
+static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) {
+  return __riscv_vxor(x, y, VECTLENSP);
+}
+static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) {
+  return __riscv_vand(SLEEF_RVV_SP_VREINTERPRET_VM(__riscv_vnot(SLEEF_RVV_SP_VREINTERPRET_VI64(x), VECTLENSP)), y, VECTLENSP);
+}
+static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) {
+  return __riscv_vmerge(y, -1, x, VECTLENSP);
+}
+static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) {
+  return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENSP), VECTLENSP);
+}
+static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) {
+  return __riscv_vmerge(y, 0, x, VECTLENSP);
+}
+
+
+/****************************************/
+/* Logical Mask Operations              */
+/****************************************/
+static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) {
+  return __riscv_vmand(x, y, VECTLENSP);
+}
+static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) {
+  return __riscv_vmandn(y, x, VECTLENSP);
+}
+static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) {
+  return __riscv_vmor(x, y, VECTLENSP);
+}
+static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) {
+  return __riscv_vmxor(x, y, VECTLENSP);
+}
+// single precision FP comparison
+static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vmfeq(x, y, VECTLENSP);
+}
+static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vmfne(x, y, VECTLENSP);
+}
+static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vmfgt(x, y, VECTLENSP);
+}
+static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vmfge(x, y, VECTLENSP);
+}
+static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vmflt(x, y, VECTLENSP);
+}
+static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) {
+  return __riscv_vmfle(x, y, VECTLENSP);
+}
+static INLINE vopmask visnan_vo_vf(vfloat d) {
+  return __riscv_vmfne(d, d, VECTLENSP);
+}
+static INLINE vopmask visinf_vo_vf(vfloat d) {
+  return __riscv_vmfeq(__riscv_vfabs(d, VECTLENSP), SLEEF_INFINITYf, VECTLENSP);
+}
+static INLINE vopmask vispinf_vo_vf(vfloat d) {
+  return __riscv_vmfeq(d, SLEEF_INFINITYf, VECTLENSP);
+}
+// conditional select
+static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) {
+  return __riscv_vmerge(y, x, mask, VECTLENSP);
+}
+static INLINE vfloat vsel_vf_vo_f_f(vopmask mask, float v1, float v0) {
+  return __riscv_vfmerge(vcast_vf_f(v0), v1, mask, VECTLENSP);
+}
+static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) {
+  return __riscv_vfmerge(__riscv_vfmerge(vcast_vf_f(d2), d1, o1, VECTLENSP), d0, o0, VECTLENSP);
+}
+static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) {
+  return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vf_f(d3), d2, o2, VECTLENSP), d1, o1, VECTLENSP), d0, o0, VECTLENSP);
+}
+// integer comparison
+static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) {
+  return __riscv_vmseq(x, y, VECTLENSP);
+}
+static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) {
+  return __riscv_vmsgt(x, y, VECTLENSP);
+}
+static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) {
+  vint2 zero = vcast_vi2_i(0);
+  return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, VECTLENSP), VECTLENSP);
+}
+// integer conditional select
+static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) {
+  return __riscv_vmerge(y, x, m, VECTLENSP);
+}
+static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) {
+  return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENSP), VECTLENSP);
+}
+#endif // ENABLE_RVV_SP
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Double-Precision Functions
+////////////////////////////////////////////////////////////////////////////////
+
+/****************************************/
+/* Multi-value and multi-word types     */
+/****************************************/
+// vdouble2 type
+static INLINE const vdouble vd2getx_vd_vd2(vdouble2 v) {
+  return SLEEF_RVV_DP_VGET_VD(v, 0);
+}
+static INLINE const vdouble vd2gety_vd_vd2(vdouble2 v) {
+  return SLEEF_RVV_DP_VGET_VD(v, 1);
+}
+static INLINE const vdouble2 vd2setxy_vd2_vd_vd(vdouble x, vdouble y) {
+  vdouble2 res;
+  res = __riscv_vset(res, 0, x);
+  res = __riscv_vset(res, 1, y);
+  return res;
+}
+static INLINE const vdouble2 vd2setx_vd2_vd2_vd(vdouble2 v, vdouble d) {
+  return __riscv_vset(v, 0, d);
+}
+static INLINE const vdouble2 vd2sety_vd2_vd2_vd(vdouble2 v, vdouble d) {
+  return __riscv_vset(v, 1, d);
+}
+// dd2 type
+static dd2 dd2setab_dd2_vd2_vd2(vdouble2 a, vdouble2 b) {
+  dd2 res;
+  res = __riscv_vset(res, 0, a);
+  res = __riscv_vset(res, 1, b);
+  return res;
+}
+static vdouble2 dd2geta_vd2_dd2(dd2 d) { return SLEEF_RVV_DP_VGET_4VD(d, 0); }
+static vdouble2 dd2getb_vd2_dd2(dd2 d) { return SLEEF_RVV_DP_VGET_4VD(d, 1); }
+// vdouble3 type
+static INLINE vdouble vd3getx_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 0); }
+static INLINE vdouble vd3gety_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 1); }
+static INLINE vdouble vd3getz_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 2); }
+static INLINE vdouble3 vd3setxyz_vd3_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  vdouble3 res;
+  res = __riscv_vset(res, 0, x);
+  res = __riscv_vset(res, 1, y);
+  res = __riscv_vset(res, 2, z);
+  return res;
+}
+static INLINE vdouble3 vd3setx_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 0, d); }
+static INLINE vdouble3 vd3sety_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 1, d); }
+static INLINE vdouble3 vd3setz_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 2, d); }
+// di type
+static INLINE vdouble digetd_vd_di(di_t d) {
+  return SLEEF_RVV_DP_VGET_VD(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(d), 0);
+}
+static INLINE vint digeti_vi_di(di_t d) {
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
+  return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 1));
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
+  return SLEEF_RVV_DP_VGET_VI(d, 2);
+#else
+#error "unknown rvv lmul"
+#endif
+}
+static INLINE di_t disetdi_di_vd_vi(vdouble d, vint i) {
+  di_t res;
+  res = SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(res), 0, d));
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
+  res = __riscv_vset(res, 1, __riscv_vlmul_ext_i32m1(i));
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
+  res = __riscv_vset(res, 2, i);
+#else
+#error "unknown rvv lmul"
+#endif
+  return res;
+}
+// ddi type
+static INLINE vdouble2 ddigetdd_vd2_ddi(ddi_t d) {
+  return SLEEF_RVV_DP_VGET_VD2(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(d), 0);
+}
+static INLINE vint ddigeti_vi_ddi(ddi_t d) {
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
+  return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 2));
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
+  return SLEEF_RVV_DP_VGET_VI(d, 4);
+#else
+#error "unknown rvv lmul"
+#endif
+}
+static INLINE ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) {
+  ddi_t res;
+  res = SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(res), 0, v));
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
+  res = __riscv_vset(res, 2, __riscv_vlmul_ext_i32m1(i));
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
+  res = __riscv_vset(res, 4, i);
+#else
+#error "unknown rvv lmul"
+#endif
+  return res;
+}
+static INLINE ddi_t ddisetdd_ddi_ddi_vd2(ddi_t ddi, vdouble2 v) {
+  return SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(ddi), 0, v));
+}
+
+/****************************************/
+/* Type Conversions and Broadcasts      */
+/****************************************/
+static INLINE vdouble vcast_vd_d(double d) {
+  return SLEEF_RVV_DP_VCAST_VD_D(d, VECTLENDP);
+}
+static INLINE vdouble vcast_vd_vi(vint i) {
+  return SLEEF_RVV_DP_VCAST_VD_VI(i);
+}
+static INLINE vint vcast_vi_i(int32_t i) {
+  return SLEEF_RVV_DP_VCAST_VI_I(i, VECTLENDP);
+}
+static INLINE vint vrint_vi_vd(vdouble vd) {
+  return SLEEF_RVV_DP_VFNCVT_X_F_VI(vd, __RISCV_FRM_RNE, VECTLENDP);
+}
+static INLINE vdouble vrint_vd_vd(vdouble vd) {
+  return SLEEF_RVV_DP_VFCVT_F_X_VD(SLEEF_RVV_DP_VFCVT_X_F_VD_RM(vd, __RISCV_FRM_RNE, VECTLENDP), VECTLENDP);
+}
+static INLINE vint vtruncate_vi_vd(vdouble vd) {
+  return __riscv_vfncvt_rtz_x(vd, VECTLENDP);
+}
+static INLINE vdouble vtruncate_vd_vd(vdouble vd) {
+  return vcast_vd_vi(vtruncate_vi_vd(vd));
+}
+
+
+/****************************************/
+/* Memory Operations                    */
+/****************************************/
+static INLINE vdouble vload_vd_p(const double *ptr) {
+  return SLEEF_RVV_DP_LOAD_VD(ptr, VECTLENDP);
+}
+static INLINE vdouble vloadu_vd_p(const double *ptr) {
+  return SLEEF_RVV_DP_LOAD_VD(ptr, VECTLENDP);
+}
+static INLINE vint vloadu_vi_p(int32_t *p) {
+  return SLEEF_RVV_DP_LOAD_VI(p, VECTLENDP);
+}
+static INLINE void vstore_v_p_vd(double *ptr, vdouble v) {
+  __riscv_vse64(ptr, v, VECTLENDP);
+}
+static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) {
+  __riscv_vse64(ptr, v, VECTLENDP);
+}
+static INLINE void vstoreu_v_p_vi(int32_t *ptr, vint v) {
+  __riscv_vse32(ptr, v, VECTLENDP);
+}
+static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) {
+  return __riscv_vluxei64(ptr, __riscv_vwmulu(SLEEF_RVV_DP_VREINTERPRET_VU(vi), sizeof(double), VECTLENDP), VECTLENDP);
+}
+
+
+/****************************************/
+/* Floating-Point Arithmetic            */
+/****************************************/
+static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vfadd(x, y, VECTLENDP);
+}
+static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vfsub(x, y, VECTLENDP);
+}
+static INLINE vdouble vrec_vd_vd(vdouble d) {
+  return __riscv_vfdiv(vcast_vd_d(1.0), d, VECTLENDP);
+}
+static INLINE vdouble vabs_vd_vd(vdouble d) {
+  return __riscv_vfabs(d, VECTLENDP);
+}
+static INLINE vdouble vsqrt_vd_vd(vdouble d) {
+  return __riscv_vfsqrt(d, VECTLENDP);
+}
+static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vfmul(x, y, VECTLENDP);
+}
+static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vfdiv(x, y, VECTLENDP);
+}
+static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vfmax(x, y, VECTLENDP);
+}
+static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vfmin(x, y, VECTLENDP);
+}
+#if defined(ENABLE_FMA_DP)
+// Multiply accumulate: z = z + x * y
+static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  return __riscv_vfmadd(x, y, z, VECTLENDP);
+}
+// Multiply subtract: z = z - x * y
+static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  return __riscv_vfmsub(x, y, z, VECTLENDP);
+}
+#else
+static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
+static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
+#endif
+// fused multiply add / sub
+static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  return __riscv_vfmadd(x, y, z, VECTLENDP);
+}
+static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  return __riscv_vfnmsub(x, y, z, VECTLENDP);
+}
+static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  return __riscv_vfmsub(x, y, z, VECTLENDP);
+}
+// sign manipulation
+static INLINE vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vfsgnjx(x, y, VECTLENDP);
+}
+static INLINE vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vfsgnj(x, y, VECTLENDP);
+}
+static INLINE vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vfsgnj(x, SLEEF_RVV_DP_VREINTERPRET_VD(__riscv_vor(SLEEF_RVV_DP_VREINTERPRET_VM(x), SLEEF_RVV_DP_VREINTERPRET_VM(y), VECTLENDP)), VECTLENDP);
+}
+static INLINE vdouble vneg_vd_vd(vdouble d) {
+  return __riscv_vfneg(d, VECTLENDP);
+}
+
+
+/****************************************/
+/* Integer Arithmetic and Logic         */
+/****************************************/
+static INLINE vint vadd_vi_vi_vi(vint x, vint y) {
+  return __riscv_vadd(x, y, VECTLENDP);
+}
+static INLINE vint vsub_vi_vi_vi(vint x, vint y) {
+  return __riscv_vsub(x, y, VECTLENDP);
+}
+static INLINE vint vneg_vi_vi(vint x) {
+  return __riscv_vneg(x, VECTLENDP);
+}
+static INLINE vint vand_vi_vi_vi(vint x, vint y) {
+  return __riscv_vand(x, y, VECTLENDP);
+}
+static INLINE vint vandnot_vi_vi_vi(vint x, vint y) {
+  return __riscv_vand(__riscv_vnot(x, VECTLENDP), y, VECTLENDP);
+}
+static INLINE vint vor_vi_vi_vi(vint x, vint y) {
+  return __riscv_vor(x, y, VECTLENDP);
+}
+static INLINE vint vxor_vi_vi_vi(vint x, vint y) {
+  return __riscv_vxor(x, y, VECTLENDP);
+}
+static INLINE vint vsll_vi_vi_i(vint x, int c) {
+  return __riscv_vsll(x, c, VECTLENDP);
+}
+static INLINE vint vsra_vi_vi_i(vint x, int c) {
+  return __riscv_vsra(x, c, VECTLENDP);
+}
+static INLINE vint vsrl_vi_vi_i(vint x, int c) {
+  return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vsrl(SLEEF_RVV_DP_VREINTERPRET_VU(x), c, VECTLENDP));
+}
+
+
+#ifdef ENABLE_RVV_DP
+/****************************************/
+/* Bitmask Operations                   */
+/****************************************/
+static INLINE vmask vcast_vm_i64(int64_t c) {
+  return SLEEF_RVV_DP_VCAST_VM_U(c, VECTLENDP);
+}
+static INLINE vmask vcast_vm_u64(uint64_t c) {
+  return SLEEF_RVV_DP_VCAST_VM_U(c, VECTLENDP);
+}
+static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) {
+  return SLEEF_RVV_DP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENDP);
+}
+static INLINE vmask vcast_vm_vi(vint vi) {
+  return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vwcvt_x(vi, VECTLENDP));
+}
+static INLINE vmask vcastu_vm_vi(vint vi) {
+  return __riscv_vsll(SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vwcvt_x(vi, VECTLENDP)), 32, VECTLENDP);
+}
+static INLINE vint vcastu_vi_vm(vmask vm) {
+  return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vnsrl(vm, 32, VECTLENDP));
+}
+static INLINE vint vcast_vi_vm(vmask vm) {
+  return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vncvt_x(vm, VECTLENDP));
+}
+static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) {
+  return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENDP), VECTLENDP);
+}
+static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) {
+  return __riscv_vand(x, y, VECTLENDP);
+}
+static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) {
+  return __riscv_vor(x, y, VECTLENDP);
+}
+static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) {
+  return __riscv_vxor(x, y, VECTLENDP);
+}
+static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) {
+  return __riscv_vand(SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vnot(SLEEF_RVV_DP_VREINTERPRET_VI64(x), VECTLENDP)), y, VECTLENDP);
+}
+static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) {
+  return __riscv_vmerge(y, 0, x, VECTLENDP);
+}
+static INLINE vmask vsll64_vm_vm_i(vmask mask, int64_t c) {
+  return __riscv_vsll(mask, c, VECTLENDP);
+}
+static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) {
+  return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vsub(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP));
+}
+static INLINE vmask vsrl64_vm_vm_i(vmask mask, int64_t c) {
+  return __riscv_vsrl(mask, c, VECTLENDP);
+}
+static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) {
+  return __riscv_vadd(x, y, VECTLENDP);
+}
+static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) {
+  return __riscv_vmerge(y, -1, x, VECTLENDP);
+}
+static INLINE vmask vsel_vm_vo64_vm_vm(vopmask mask, vmask x, vmask y) {
+  return __riscv_vmerge(y, x, mask, VECTLENDP);
+}
+static INLINE vmask vneg64_vm_vm(vmask mask) {
+  return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vneg(SLEEF_RVV_DP_VREINTERPRET_VI64(mask), VECTLENDP));
+}
+static INLINE vdouble vreinterpret_vd_vm(vmask vm) {
+  return SLEEF_RVV_DP_VREINTERPRET_VD(vm);
+}
+static INLINE vmask vreinterpret_vm_vd(vdouble vd) {
+  return SLEEF_RVV_DP_VREINTERPRET_VM(vd);
+}
+
+// vquad type
+static INLINE const vmask vqgetx_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v, 0); }
+static INLINE const vmask vqgety_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v, 1); }
+static INLINE vquad vqsetxy_vq_vm_vm(vmask x, vmask y) {
+  vquad res;
+  res = __riscv_vset(res, 0, x);
+  res = __riscv_vset(res, 1, y);
+  return res;
+}
+static INLINE vquad vqsetx_vq_vq_vm(vquad v, vmask x) { return __riscv_vset(v, 0, x); }
+static INLINE vquad vqsety_vq_vq_vm(vquad v, vmask y) { return __riscv_vset(v, 1, y); }
+
+
+
+/****************************************/
+/* Logical Mask Operations              */
+/****************************************/
+static INLINE vopmask vcast_vo64_vo32(vopmask vo) {
+  return vo;
+}
+static INLINE vopmask vcast_vo32_vo64(vopmask vo) {
+  return vo;
+}
+static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) {
+  return __riscv_vmand(x, y, VECTLENDP);
+}
+static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) {
+  return __riscv_vmandn(y, x, VECTLENDP);
+}
+static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) {
+  return __riscv_vmor(x, y, VECTLENDP);
+}
+static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) {
+  return __riscv_vmxor(x, y, VECTLENDP);
+}
+static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) {
+  return __riscv_vmseq(x, y, VECTLENDP);
+}
+static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) {
+  return __riscv_vmsgt(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP);
+}
+// double-precision comparison
+static INLINE vopmask visinf_vo_vd(vdouble d) {
+  return __riscv_vmfeq(__riscv_vfabs(d, VECTLENDP), SLEEF_INFINITY, VECTLENDP);
+}
+static INLINE vopmask vispinf_vo_vd(vdouble d) {
+  return __riscv_vmfeq(d, SLEEF_INFINITY, VECTLENDP);
+}
+static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vmfeq(x, y, VECTLENDP);
+}
+static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vmfne(x, y, VECTLENDP);
+}
+static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vmflt(x, y, VECTLENDP);
+}
+static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vmfle(x, y, VECTLENDP);
+}
+static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vmfgt(x, y, VECTLENDP);
+}
+static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) {
+  return __riscv_vmfge(x, y, VECTLENDP);
+}
+static INLINE vopmask visnan_vo_vd(vdouble d) {
+  return __riscv_vmfne(d, d, VECTLENDP);
+}
+// double-precision conditional select
+static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) {
+  return __riscv_vmerge(y, x, mask, VECTLENDP);
+}
+static INLINE vdouble vsel_vd_vo_d_d(vopmask mask, double v0, double v1) {
+  return __riscv_vfmerge(vcast_vd_d(v1), v0, mask, VECTLENDP);
+}
+static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
+  return __riscv_vfmerge(__riscv_vfmerge(vcast_vd_d(d2), d1, o1, VECTLENDP), d0, o0, VECTLENDP);
+}
+static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
+  return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vd_d(d3), d2, o2, VECTLENDP), d1, o1, VECTLENDP), d0, o0, VECTLENDP);
+}
+static INLINE int vtestallones_i_vo64(vopmask g) {
+  return __riscv_vcpop(g, VECTLENDP) == VECTLENDP;
+}
+// integer comparison
+static INLINE vopmask veq_vo_vi_vi(vint x, vint y) {
+  return __riscv_vmseq(x, y, VECTLENDP);
+}
+static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) {
+  return __riscv_vmsgt(x, y, VECTLENDP);
+}
+static INLINE vint vgt_vi_vi_vi(vint x, vint y) {
+  vint zero = vcast_vi_i(0);
+  return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, VECTLENDP), VECTLENDP);
+}
+// integer conditional select
+static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) {
+  return __riscv_vmerge(y, x, m, VECTLENDP);
+}
+static INLINE vint vandnot_vi_vo_vi(vopmask mask, vint vi) {
+  return __riscv_vmerge(vi, 0, mask, VECTLENDP);
+}
+static INLINE vint vand_vi_vo_vi(vopmask x, vint y) {
+  return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENDP), VECTLENDP);
+}
+#endif // ENABLE_RVV_DP
+
+#endif // HELPERRVV_H
diff --git a/src/common/commonfuncs.h b/src/common/commonfuncs.h
index 2f1a0da9..274156fc 100644
--- a/src/common/commonfuncs.h
+++ b/src/common/commonfuncs.h
@@ -3,7 +3,7 @@
 //    (See accompanying file LICENSE.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 typedef struct {
   vdouble x, y, z;
 } vdouble3;
@@ -210,14 +210,17 @@ static INLINE CONST VECTOR_CC vdouble vtoward0_vd_vd(vdouble x) { // returns nex
   return vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vcast_vd_d(0), t);
 }
 
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static INLINE CONST vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) {
   return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y)));
 }
+#endif
 
 static INLINE CONST VECTOR_CC vdouble vsign_vd_vd(vdouble d) {
   return vmulsign_vd_vd_vd(vcast_vd_d(1.0), d);
 }
 
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static INLINE CONST VECTOR_CC vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) {
   return vreinterpret_vd_vm(vor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y)));
 }
@@ -226,6 +229,7 @@ static INLINE CONST VECTOR_CC vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) {
   return vreinterpret_vd_vm(vxor_vm_vm_vm(vandnot_vm_vm_vm(vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(x)), 
 					  vand_vm_vm_vm   (vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(y))));
 }
+#endif
 
 static INLINE CONST VECTOR_CC vdouble vtruncate2_vd_vd(vdouble x) {
 #ifdef FULL_FP_ROUNDING
diff --git a/src/common/dd.h b/src/common/dd.h
index b1423556..3431e42d 100644
--- a/src/common/dd.h
+++ b/src/common/dd.h
@@ -3,7 +3,7 @@
 //    (See accompanying file LICENSE.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 #if !defined(ENABLE_CUDA)
 typedef struct {
   vdouble x, y;
diff --git a/src/common/df.h b/src/common/df.h
index 4e3e7949..a14c1c6a 100644
--- a/src/common/df.h
+++ b/src/common/df.h
@@ -3,7 +3,7 @@
 //    (See accompanying file LICENSE.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 #if !defined(ENABLE_CUDA)
 typedef struct {
   vfloat x, y;
diff --git a/src/libm-tester/CMakeLists.txt b/src/libm-tester/CMakeLists.txt
index 69f2ba6e..41d6f36f 100644
--- a/src/libm-tester/CMakeLists.txt
+++ b/src/libm-tester/CMakeLists.txt
@@ -27,6 +27,11 @@ set(TESTER3_DEFINITIONS_VXENOFMA  ATR=cinz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SL
 set(TESTER3_DEFINITIONS_VXE2      ATR=finz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vxe2)
 set(TESTER3_DEFINITIONS_VXE2NOFMA ATR=cinz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vxe2nofma)
 
+set(TESTER3_DEFINITIONS_RVVM1      ATR=finz_ DPTYPE=vfloat64m1_t SPTYPE=vfloat32m1_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm1 ENABLE_RVVM1)
+set(TESTER3_DEFINITIONS_RVVM1NOFMA ATR=cinz_ DPTYPE=vfloat64m1_t SPTYPE=vfloat32m1_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm1nofma ENABLE_RVVM1)
+set(TESTER3_DEFINITIONS_RVVM2      ATR=finz_ DPTYPE=vfloat64m2_t SPTYPE=vfloat32m2_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm2 ENABLE_RVVM2)
+set(TESTER3_DEFINITIONS_RVVM2NOFMA ATR=cinz_ DPTYPE=vfloat64m2_t SPTYPE=vfloat32m2_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm2nofma ENABLE_RVVM2)
+
 set(TESTER3_DEFINITIONS_PUREC_SCALAR    ATR=cinz_ DPTYPE=double SPTYPE=float DPTYPESPEC=d1 SPTYPESPEC=f1 EXTSPEC=purec)
 set(TESTER3_DEFINITIONS_PURECFMA_SCALAR ATR=finz_ DPTYPE=double SPTYPE=float DPTYPESPEC=d1 SPTYPESPEC=f1 EXTSPEC=purecfma)
 
@@ -47,6 +52,9 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
   set(TEST3_CINZ purec_scalar vxenofma vxe2nofma)
   set(TEST3_FINZ purecfma_scalar vxe vxe2)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
+  set(TEST3_CINZ purec_scalar rvvm1nofma rvvm2nofma)
+  set(TEST3_FINZ purecfma_scalar rvvm1 rvvm2)
 endif()
 
 #
diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c
index 002cb0f1..03fcd743 100644
--- a/src/libm-tester/iutsimd.c
+++ b/src/libm-tester/iutsimd.c
@@ -343,6 +343,30 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2;
 typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2;
 #endif
 
+#ifdef ENABLE_RVVM1
+#define CONFIG 1
+#include "helperrvv.h"
+#include "renamervvm1.h"
+#endif
+
+#ifdef ENABLE_RVVM1NOFMA
+#define CONFIG 2
+#include "helperrvv.h"
+#include "renamervvm1nofma.h"
+#endif
+
+#ifdef ENABLE_RVVM2
+#define CONFIG 1
+#include "helperrvv.h"
+#include "renamervvm2.h"
+#endif
+
+#ifdef ENABLE_RVVM2NOFMA
+#define CONFIG 2
+#include "helperrvv.h"
+#include "renamervvm2nofma.h"
+#endif
+
 #ifdef ENABLE_PUREC_SCALAR
 #include "renamepurec_scalar.h"
 #if !defined(USE_INLINE_HEADER)
@@ -426,12 +450,12 @@ int check_feature(double d, float f) {
   return 0;
 }
 
-#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(USE_INLINE_HEADER))
+#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) || defined(USE_INLINE_HEADER))
 static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; }
 static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; }
 #endif
 
-#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(USE_INLINE_HEADER))
+#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) || defined(USE_INLINE_HEADER))
 static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; }
 static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; }
 #endif
diff --git a/src/libm-tester/tester2simddp.c b/src/libm-tester/tester2simddp.c
index 540d1142..5071bb70 100644
--- a/src/libm-tester/tester2simddp.c
+++ b/src/libm-tester/tester2simddp.c
@@ -191,6 +191,38 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2;
 typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2;
 #endif
 
+#ifdef ENABLE_RVVM1
+#define CONFIG 1
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm1.h"
+#include "sleef.h"
+#endif
+
+#ifdef ENABLE_RVVM1NOFMA
+#define CONFIG 2
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm1nofma.h"
+#include "sleef.h"
+#endif
+
+#ifdef ENABLE_RVVM2
+#define CONFIG 1
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm2.h"
+#include "sleef.h"
+#endif
+
+#ifdef ENABLE_RVVM2NOFMA
+#define CONFIG 2
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm2nofma.h"
+#include "sleef.h"
+#endif
+
 #ifdef ENABLE_PUREC_SCALAR
 #define CONFIG 1
 #include "helperpurec_scalar.h"
@@ -209,7 +241,7 @@ typedef Sleef_float_2 vfloat2;
 
 //
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; }
 static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; }
 #endif
diff --git a/src/libm-tester/tester2simdsp.c b/src/libm-tester/tester2simdsp.c
index d140ba4b..3fb1e619 100644
--- a/src/libm-tester/tester2simdsp.c
+++ b/src/libm-tester/tester2simdsp.c
@@ -191,6 +191,38 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2;
 typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2;
 #endif
 
+#ifdef ENABLE_RVVM1
+#define CONFIG 1
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#include "renamervvm1.h"
+#include "sleef.h"
+#endif
+
+#ifdef ENABLE_RVVM1NOFMA
+#define CONFIG 2
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm1nofma.h"
+#include "sleef.h"
+#endif
+
+#ifdef ENABLE_RVVM2
+#define CONFIG 1
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#include "renamervvm2.h"
+#include "sleef.h"
+#endif
+
+#ifdef ENABLE_RVVM2NOFMA
+#define CONFIG 2
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm2nofma.h"
+#include "sleef.h"
+#endif
+
 #ifdef ENABLE_PUREC_SCALAR
 #define CONFIG 1
 #include "helperpurec_scalar.h"
@@ -209,7 +241,7 @@ typedef Sleef_float_2 vfloat2;
 
 //
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; }
 static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; }
 #endif
diff --git a/src/libm-tester/tester3.c b/src/libm-tester/tester3.c
index f4b27e56..a55404ed 100644
--- a/src/libm-tester/tester3.c
+++ b/src/libm-tester/tester3.c
@@ -101,6 +101,44 @@ static INLINE __attribute__((vector_size(16))) float setSLEEF_VECTOR_FLOAT(float
 static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float v, int r) { return unifyValuef(v[r & 3]); }
 #endif
 
+#if __riscv && __riscv_v
+
+#if defined(ENABLE_RVVM1)
+#define VECTLENSP (1 * __riscv_vlenb() / sizeof(float))
+#define VECTLENDP (1 * __riscv_vlenb() / sizeof(double))
+
+static INLINE vfloat32m1_t setvfloat32m1_t(float d, int r)  { float  a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m1(a, VECTLENSP); }
+static INLINE float getvfloat32m1_t(vfloat32m1_t v, int r)  { float  a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); }
+static INLINE vfloat64m1_t setvfloat64m1_t(double d, int r) { double a[VECTLENDP]; memrand(a, sizeof(a)); a[r & (VECTLENDP-1)] = d; return __riscv_vle64_v_f64m1(a, VECTLENDP); }
+static INLINE double getvfloat64m1_t(vfloat64m1_t v, int r) { double a[VECTLENDP]; __riscv_vse64(a, v, VECTLENDP); return unifyValue(a[r & (VECTLENDP-1)]); }
+
+static vfloat32m1_t vf2getx_vf_vf2(vfloat32m2_t v) { return __riscv_vget_f32m1(v, 0); }
+static vfloat32m1_t vf2gety_vf_vf2(vfloat32m2_t v) { return __riscv_vget_f32m1(v, 1); }
+static vfloat64m1_t vd2getx_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 0); }
+static vfloat64m1_t vd2gety_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 1); }
+
+#elif defined(ENABLE_RVVM2)
+#define VECTLENSP (2 * __riscv_vlenb() / sizeof(float))
+#define VECTLENDP (2 * __riscv_vlenb() / sizeof(double))
+
+static INLINE vfloat32m2_t setvfloat32m2_t(float d, int r)  { float  a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m2(a, VECTLENSP); }
+static INLINE float getvfloat32m2_t(vfloat32m2_t v, int r)  { float  a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); }
+static INLINE vfloat64m2_t setvfloat64m2_t(double d, int r) { double a[VECTLENDP]; memrand(a, sizeof(a)); a[r & (VECTLENDP-1)] = d; return __riscv_vle64_v_f64m2(a, VECTLENDP); }
+static INLINE double getvfloat64m2_t(vfloat64m2_t v, int r) { double a[VECTLENDP]; __riscv_vse64(a, v, VECTLENDP); return unifyValue(a[r & (VECTLENDP-1)]); }
+
+static vfloat32m2_t vf2getx_vf_vf2(vfloat32m4_t v) { return __riscv_vget_f32m2(v, 0); }
+static vfloat32m2_t vf2gety_vf_vf2(vfloat32m4_t v) { return __riscv_vget_f32m2(v, 1); }
+static vfloat64m2_t vd2getx_vd_vd2(vfloat64m4_t v) { return __riscv_vget_f64m2(v, 0); }
+static vfloat64m2_t vd2gety_vd_vd2(vfloat64m4_t v) { return __riscv_vget_f64m2(v, 1); }
+
+#else
+#error "unknown RVV"
+#endif
+
+#undef VECTLENSP
+#undef VECTLENDP
+#endif
+
 //
 
 // ATR = cinz_, NAME = sin, TYPE = d2, ULP = u35, EXT = sse2
@@ -110,7 +148,7 @@ static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float
 #define SET(TYPE) set ## TYPE
 #define GET(TYPE) get ## TYPE
 
-#ifndef __ARM_FEATURE_SVE
+#if !defined(__ARM_FEATURE_SVE) && !(defined(__riscv) && defined(__riscv_v))
 static DPTYPE vd2getx_vd_vd2(TYPE2(DPTYPE) v) { return v.x; }
 static DPTYPE vd2gety_vd_vd2(TYPE2(DPTYPE) v) { return v.y; }
 static SPTYPE vf2getx_vf_vf2(TYPE2(SPTYPE) v) { return v.x; }
diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt
index fe0a5d39..a0e4029d 100644
--- a/src/libm/CMakeLists.txt
+++ b/src/libm/CMakeLists.txt
@@ -60,6 +60,15 @@ elseif(SLEEF_ARCH_S390X)
     PURECFMA_SCALAR
     DSP_SCALAR
   )
+elseif(SLEEF_ARCH_RISCV64)
+  set(SLEEF_HEADER_LIST
+    RVVM1
+    RVVM1NOFMA
+    RVVM2
+    RVVM2NOFMA
+    PUREC_SCALAR
+    PURECFMA_SCALAR
+    )
 endif()
 
 # HEADER_PARAMS
@@ -98,6 +107,11 @@ command_arguments(HEADER_PARAMS_VXENOFMA        cinz_ 2 4 "SLEEF_VECTOR_DOUBLE"
 command_arguments(HEADER_PARAMS_VXE2            finz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2)
 command_arguments(HEADER_PARAMS_VXE2NOFMA       cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2nofma)
 
+command_arguments(HEADER_PARAMS_RVVM1           finz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v rvvm1)
+command_arguments(HEADER_PARAMS_RVVM1NOFMA      cinz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v rvvm1nofma)
+command_arguments(HEADER_PARAMS_RVVM2           finz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v rvvm2)
+command_arguments(HEADER_PARAMS_RVVM2NOFMA      cinz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v rvvm2nofma)
+
 command_arguments(HEADER_PARAMS_DSP_SCALAR      -     1 1 double float int32_t int32_t __STDC__)
 command_arguments(HEADER_PARAMS_PUREC_SCALAR    cinz_ 1 1 double float int32_t int32_t __STDC__ purec)
 command_arguments(HEADER_PARAMS_PURECFMA_SCALAR finz_ 1 1 double float int32_t int32_t __STDC__ purecfma)
@@ -144,6 +158,11 @@ command_arguments(RENAME_PARAMS_GNUABI_ADVSIMD  advsimd n 2 4 float64x2_t float3
 # the "x" token of VLA SVE vector functions.
 command_arguments(RENAME_PARAMS_GNUABI_SVE sve s x x svfloat64_t svfloat32_t svint32_t svint32_t __ARM_SVE)
 
+command_arguments(RENAME_PARAMS_RVVM1           finz_ x x rvvm1)
+command_arguments(RENAME_PARAMS_RVVM1NOFMA      cinz_ x x rvvm1nofma)
+command_arguments(RENAME_PARAMS_RVVM2           finz_ x x rvvm2)
+command_arguments(RENAME_PARAMS_RVVM2NOFMA      cinz_ x x rvvm2nofma)
+
 # ALIAS_PARAMS
 
 command_arguments(ALIAS_PARAMS_AVX512F_DP   8 __m512d __m256i e avx512f)
@@ -476,12 +495,17 @@ if(BUILD_INLINE_HEADERS)
       if(COMPILER_SUPPORTS_${SIMD})
 	string(TOLOWER ${SIMD} SIMDLC)
 
+        if(CMAKE_CROSSCOMPILING AND CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_TARGET)
+          set(FLAG_TARGET --target=${CMAKE_C_COMPILER_TARGET})
+        endif()
+
 	set(INLINE_HEADER_FILE ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h)
 	add_custom_command(
 	  OUTPUT ${INLINE_HEADER_FILE} 
 	  
 	  # Preprocess sleefsimddp.c with SLEEF_GENHEADER defined, comments are preserved
 	  COMMAND "${CMAKE_C_COMPILER}"  ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS}                      # gcc -E -C 
+	  ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}}                                                           # -msse2
 	  ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch    # -I/sleef/src/common -I/sleef/src/arch
 	  ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/                                              # -I/build/src/libm/include
 	  ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME                # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME
@@ -497,6 +521,7 @@ if(BUILD_INLINE_HEADERS)
 
 	  # Preprocess sleefsimdsp.c with SLEEF_GENHEADER defined. Include macroonly*.h instead of helper*.h.
 	  COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS}                       # gcc -E -C 
+	  ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}}                                                           # -msse2
 	  ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch    # -I/sleef/src/common -I/sleef/src/arch
 	  ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/                                              # -I/build/src/libm/include
 	  ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME                # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME
diff --git a/src/libm/sleeflibm_header.h.org.in b/src/libm/sleeflibm_header.h.org.in
index d637b60e..89b3a1ca 100644
--- a/src/libm/sleeflibm_header.h.org.in
+++ b/src/libm/sleeflibm_header.h.org.in
@@ -131,6 +131,18 @@ SLEEF_IMPORT void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx);
 
 //
 
+#if defined(__riscv_v)
+#include <riscv_vector.h>
+typedef vfloat64m2_t Sleef_vfloat64m1_t_2;
+typedef vfloat32m2_t Sleef_vfloat32m1_t_2;
+typedef vfloat64m4_t Sleef_vfloat64m2_t_2;
+typedef vfloat32m4_t Sleef_vfloat32m2_t_2;
+#define Sleef_vfloat64m1_t_2_DEFINED
+#define Sleef_vfloat32m1_t_2_DEFINED
+#define Sleef_vfloat64m2_t_2_DEFINED
+#define Sleef_vfloat32m2_t_2_DEFINED
+#endif
+
 #ifndef Sleef_double2_DEFINED
 #define Sleef_double2_DEFINED
 typedef struct {
diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c
index e531495f..6cdaa6ad 100644
--- a/src/libm/sleefsimddp.c
+++ b/src/libm/sleefsimddp.c
@@ -221,6 +221,59 @@ extern const double Sleef_rempitabdp[];
 #endif
 #endif
 
+// RISC-V
+#ifdef ENABLE_RVVM1
+#define CONFIG 1
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM1.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm1.h"
+#endif
+#endif
+
+#ifdef ENABLE_RVVM1NOFMA
+#define CONFIG 2
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM1NOFMA.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm1nofma.h"
+#endif
+#endif /* ENABLE_RVVM1NOFMA */
+
+#ifdef ENABLE_RVVM2
+#define CONFIG 1
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM2.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm2.h"
+#endif
+#endif
+
+#ifdef ENABLE_RVVM2NOFMA
+#define CONFIG 2
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM2NOFMA.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm2nofma.h"
+#endif
+#endif /* ENABLE_RVVM2NOFMA */
+
 // Generic
 
 #ifdef ENABLE_VECEXT
diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c
index 9e1faa23..c5dbc2a5 100644
--- a/src/libm/sleefsimdsp.c
+++ b/src/libm/sleefsimdsp.c
@@ -321,6 +321,59 @@ extern const float Sleef_rempitabsp[];
 #endif
 #endif
 
+// RISC-V
+#ifdef ENABLE_RVVM1
+#define CONFIG 1
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM1.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm1.h"
+#endif
+#endif
+
+#ifdef ENABLE_RVVM1NOFMA
+#define CONFIG 2
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM1NOFMA.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm1nofma.h"
+#endif
+#endif
+
+#ifdef ENABLE_RVVM2
+#define CONFIG 1
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM2.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm2.h"
+#endif
+#endif
+
+#ifdef ENABLE_RVVM2NOFMA
+#define CONFIG 2
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM2NOFMA.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm2nofma.h"
+#endif
+#endif
+
 // Generic
 
 #ifdef ENABLE_VECEXT
@@ -401,6 +454,7 @@ static INLINE CONST VECTOR_CC vmask vsignbit_vm_vf(vfloat f) {
   return vand_vm_vm_vm(vreinterpret_vm_vf(f), vreinterpret_vm_vf(vcast_vf_f(-0.0f)));
 }
 
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static INLINE CONST VECTOR_CC vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) {
   return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y)));
 }
@@ -413,6 +467,7 @@ static INLINE CONST VECTOR_CC vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) {
 static INLINE CONST VECTOR_CC vfloat vsign_vf_vf(vfloat f) {
   return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(1.0f)), vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))));
 }
+#endif
 
 static INLINE CONST VECTOR_CC vopmask vsignbit_vo_vf(vfloat d) {
   return veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vcast_vi2_i(0x80000000)), vcast_vi2_i(0x80000000));
@@ -487,7 +542,7 @@ static INLINE CONST VECTOR_CC vfloat vldexp3_vf_vf_vi2(vfloat d, vint2 q) {
 
 EXPORT CONST VECTOR_CC vfloat xldexpf(vfloat x, vint2 q) { return vldexp_vf_vf_vi2(x, q); }
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 typedef struct {
   vfloat d;
   vint2 i;
@@ -517,9 +572,11 @@ static dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) {
 }
 #endif
 
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static INLINE CONST VECTOR_CC vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) {
   return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y)));
 }
+#endif
 
 static INLINE CONST fi_t rempisubf(vfloat x) {
 #ifdef FULL_FP_ROUNDING
@@ -3290,7 +3347,7 @@ EXPORT CONST VECTOR_CC vfloat xcospif_u05(vfloat d) {
 }
 #endif // #if !defined(DETERMINISTIC)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
   typedef struct {
     vfloat2 a, b;
   } df2;
diff --git a/src/quad/CMakeLists.txt b/src/quad/CMakeLists.txt
index f37de951..5eae42da 100644
--- a/src/quad/CMakeLists.txt
+++ b/src/quad/CMakeLists.txt
@@ -234,12 +234,17 @@ if(BUILD_INLINE_HEADERS)
       if(COMPILER_SUPPORTS_${SIMD})
 	string(TOLOWER ${SIMD} SIMDLC)
 
+        if(CMAKE_CROSSCOMPILING AND CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_TARGET)
+          set(FLAG_TARGET --target=${CMAKE_C_COMPILER_TARGET})
+        endif()
+
 	set(INLINE_HEADER_FILE ${PROJECT_BINARY_DIR}/include/sleefquadinline_${SIMDLC}.h)
 	add_custom_command(
 	  OUTPUT ${INLINE_HEADER_FILE} 
   
 	  # Preprocess sleefsimddp.c with SLEEF_GENHEADER defined, comments are preserved
 	  COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS}                        # gcc -E -C 
+	  ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}}                                                            # -msse2
 	  ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch     # -I/sleef/src/common -I/sleef/src/arch
 	  ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/                                               # -I/build/src/quad/include
 	  ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME                 # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME
diff --git a/travis/before_script.aarch64-gcc.sh b/travis/before_script.aarch64-gcc.sh
index 56c4c88c..f590a9db 100644
--- a/travis/before_script.aarch64-gcc.sh
+++ b/travis/before_script.aarch64-gcc.sh
@@ -8,5 +8,5 @@ ninja all
 cd /build
 mkdir build-cross
 cd build-cross
-cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE ..
+cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64-gcc.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE ..
 
diff --git a/travis/before_script.armhf-gcc.sh b/travis/before_script.armhf-gcc.sh
index 8c4bd4fa..464fa581 100644
--- a/travis/before_script.armhf-gcc.sh
+++ b/travis/before_script.armhf-gcc.sh
@@ -8,4 +8,4 @@ ninja all
 cd /build
 mkdir build-cross
 cd build-cross
-cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE ..
+cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf-gcc.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE ..
diff --git a/travis/toolchain-aarch64-gcc.cmake b/travis/toolchain-aarch64-gcc.cmake
new file mode 100644
index 00000000..c3594551
--- /dev/null
+++ b/travis/toolchain-aarch64-gcc.cmake
@@ -0,0 +1,11 @@
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "aarch64")
+
+SET(CMAKE_FIND_ROOT_PATH  /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu)
+
+find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-11 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/travis/toolchain-aarch64.cmake b/travis/toolchain-aarch64-llvm.cmake
similarity index 71%
rename from travis/toolchain-aarch64.cmake
rename to travis/toolchain-aarch64-llvm.cmake
index c73de216..d9c11ae0 100644
--- a/travis/toolchain-aarch64.cmake
+++ b/travis/toolchain-aarch64-llvm.cmake
@@ -4,7 +4,8 @@ SET (CMAKE_SYSTEM_PROCESSOR "aarch64")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu)
 
-find_program(CMAKE_C_COMPILER aarch64-linux-gnu-gcc aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5)
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang)
+set(CMAKE_C_COMPILER_TARGET aarch64-linux-gnu)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
diff --git a/travis/toolchain-armhf-gcc.cmake b/travis/toolchain-armhf-gcc.cmake
new file mode 100644
index 00000000..24e160b9
--- /dev/null
+++ b/travis/toolchain-armhf-gcc.cmake
@@ -0,0 +1,11 @@
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "armhf")
+
+SET(CMAKE_FIND_ROOT_PATH  /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf)
+
+find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-11 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/travis/toolchain-armhf.cmake b/travis/toolchain-armhf-llvm.cmake
similarity index 69%
rename from travis/toolchain-armhf.cmake
rename to travis/toolchain-armhf-llvm.cmake
index ba233487..6c157289 100644
--- a/travis/toolchain-armhf.cmake
+++ b/travis/toolchain-armhf-llvm.cmake
@@ -4,7 +4,8 @@ SET (CMAKE_SYSTEM_PROCESSOR "armhf")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf)
 
-find_program(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5)
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang)
+set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabihf)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
diff --git a/travis/toolchain-native-gcc.cmake b/travis/toolchain-native-gcc.cmake
new file mode 100644
index 00000000..07ea294d
--- /dev/null
+++ b/travis/toolchain-native-gcc.cmake
@@ -0,0 +1 @@
+find_program(CMAKE_C_COMPILER gcc)
diff --git a/travis/toolchain-native-llvm.cmake b/travis/toolchain-native-llvm.cmake
new file mode 100644
index 00000000..6f8e7121
--- /dev/null
+++ b/travis/toolchain-native-llvm.cmake
@@ -0,0 +1 @@
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang)
diff --git a/travis/toolchain-ppc64el.cmake b/travis/toolchain-ppc64el-gcc.cmake
similarity index 80%
rename from travis/toolchain-ppc64el.cmake
rename to travis/toolchain-ppc64el-gcc.cmake
index e26a6eaa..7d6c96ae 100644
--- a/travis/toolchain-ppc64el.cmake
+++ b/travis/toolchain-ppc64el-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "ppc64")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu)
 
-find_program(CMAKE_C_COMPILER powerpc64le-linux-gnu-gcc ppc64el-cc)
+find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-11 powerpc64le-linux-gnu-gcc ppc64el-cc)
 
 SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar)
 
diff --git a/travis/toolchain-ppc64el-llvm.cmake b/travis/toolchain-ppc64el-llvm.cmake
new file mode 100644
index 00000000..531b36f3
--- /dev/null
+++ b/travis/toolchain-ppc64el-llvm.cmake
@@ -0,0 +1,14 @@
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "ppc64")
+
+SET(CMAKE_FIND_ROOT_PATH  /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu)
+
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang)
+set(CMAKE_C_COMPILER_TARGET powerpc64le-linux-gnu)
+
+SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/travis/toolchain-riscv64-gcc.cmake b/travis/toolchain-riscv64-gcc.cmake
new file mode 100644
index 00000000..b0840998
--- /dev/null
+++ b/travis/toolchain-riscv64-gcc.cmake
@@ -0,0 +1,11 @@
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "riscv64")
+
+SET(CMAKE_FIND_ROOT_PATH  /usr/riscv64-linux-gnu /usr/include/riscv64-linux-gnu /usr/lib/riscv64-linux-gnu /lib/riscv64-linux-gnu)
+
+find_program(CMAKE_C_COMPILER NAMES riscv64-linux-gnu-gcc-14 riscv64-linux-gnu-gcc)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/travis/toolchain-riscv64-llvm.cmake b/travis/toolchain-riscv64-llvm.cmake
new file mode 100644
index 00000000..1821770a
--- /dev/null
+++ b/travis/toolchain-riscv64-llvm.cmake
@@ -0,0 +1,12 @@
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "riscv64")
+
+SET(CMAKE_FIND_ROOT_PATH  /usr/riscv64-linux-gnu /usr/include/riscv64-linux-gnu /usr/lib/riscv64-linux-gnu /lib/riscv64-linux-gnu)
+
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang)
+set(CMAKE_C_COMPILER_TARGET riscv64-linux-gnu)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/travis/toolchain-s390x.cmake b/travis/toolchain-s390x-gcc.cmake
similarity index 81%
rename from travis/toolchain-s390x.cmake
rename to travis/toolchain-s390x-gcc.cmake
index a2d37bda..4aa9f12c 100644
--- a/travis/toolchain-s390x.cmake
+++ b/travis/toolchain-s390x-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "s390x")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu)
 
-find_program(CMAKE_C_COMPILER s390x-linux-gnu-gcc)
+find_program(CMAKE_C_COMPILER NAMES s390x-linux-gnu-gcc-11 s390x-linux-gnu-gcc)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
diff --git a/travis/toolchain-s390x-llvm.cmake b/travis/toolchain-s390x-llvm.cmake
new file mode 100644
index 00000000..ca5e9687
--- /dev/null
+++ b/travis/toolchain-s390x-llvm.cmake
@@ -0,0 +1,12 @@
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "s390x")
+
+SET(CMAKE_FIND_ROOT_PATH  /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu)
+
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang)
+set(CMAKE_C_COMPILER_TARGET s390x-linux-gnu)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)