From d747f611af755558d5d073d39185224d4b180fea Mon Sep 17 00:00:00 2001
From: GlassOfWhiskey <iacopo.c92@gmail.com>
Date: Wed, 19 Oct 2022 18:27:04 +0200
Subject: [PATCH 01/24] Extend FMA support to RISC-V

---
 src/arch/helperpurec_scalar.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/helperpurec_scalar.h b/src/arch/helperpurec_scalar.h
index d8b9c845..fb83b84c 100644
--- a/src/arch/helperpurec_scalar.h
+++ b/src/arch/helperpurec_scalar.h
@@ -54,7 +54,7 @@
 #define ENABLE_FMA_SP
 //@#define ENABLE_FMA_SP
 
-#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || CONFIG == 3
+#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || defined(__riscv) || CONFIG == 3
 #ifndef FP_FAST_FMA
 //@#ifndef FP_FAST_FMA
 #define FP_FAST_FMA

From a7f1d93d4d598c707563f87250c3677d217270bf Mon Sep 17 00:00:00 2001
From: Eric Love <eric.love@sifive.com>
Date: Fri, 13 Jan 2023 13:01:50 -0800
Subject: [PATCH 02/24] Add support for the RISC-V Vector ISA

---
 CMakeLists.txt                     |    2 +
 Configure.cmake                    |    7 +
 src/arch/helperpurec_scalar.h      |    2 +-
 src/arch/helperrvv.h               | 1016 ++++++++++++++++++++++++++++
 src/common/commonfuncs.h           |    6 +-
 src/common/dd.h                    |    2 +-
 src/common/df.h                    |    2 +-
 src/libm-tester/iutsimd.c          |   16 +-
 src/libm-tester/tester2simddp.c    |   18 +-
 src/libm-tester/tester2simdsp.c    |   18 +-
 src/libm/CMakeLists.txt            |   13 +
 src/libm/sleeflibm_header.h.org.in |   12 +
 src/libm/sleefsimddp.c             |   27 +
 src/libm/sleefsimdsp.c             |   35 +-
 travis/toolchain-riscv64.cmake     |    9 +
 15 files changed, 1175 insertions(+), 10 deletions(-)
 create mode 100644 src/arch/helperrvv.h
 create mode 100644 travis/toolchain-riscv64.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ec9e04e3..40dca676 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,6 +46,7 @@ set(SLEEF_ALL_SUPPORTED_EXTENSIONS
   NEON32 NEON32VFPV4                                    # Aarch32
   VSX VSXNOFMA VSX3 VSX3NOFMA                           # PPC64
   VXE VXENOFMA VXE2 VXE2NOFMA	                        # IBM Z
+  RVVM1 RVVM2                                           # RISC-V Vectors
   PUREC_SCALAR PURECFMA_SCALAR                          # Generic type
   CACHE STRING "List of SIMD architectures supported by libsleef."
   )
@@ -56,6 +57,7 @@ set(SLEEF_SUPPORTED_LIBM_EXTENSIONS
   NEON32 NEON32VFPV4                                    # Aarch32
   VSX VSXNOFMA VSX3 VSX3NOFMA                           # PPC64
   VXE VXENOFMA VXE2 VXE2NOFMA	                        # IBM Z
+  RVVM1 RVVM2                                           # RISC-V Vectors
   PUREC_SCALAR PURECFMA_SCALAR                          # Generic type
   CACHE STRING "List of SIMD architectures supported by libsleef."
   )
diff --git a/Configure.cmake b/Configure.cmake
index 63d2c638..25392d09 100644
--- a/Configure.cmake
+++ b/Configure.cmake
@@ -119,6 +119,10 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
   set(SLEEF_ARCH_S390X ON CACHE INTERNAL "True for IBM Z architecture.")
 
   set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z14;-mzvector")
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
+  set(SLEEF_ARCH_RISCV64 ON CACHE INTERNAL "True for RISCV64 architecture.")
+  set(COMPILER_SUPPORTS_RVVM1 1)
+  set(COMPILER_SUPPORTS_RVVM2 1)
 endif()
 
 set(COMPILER_SUPPORTS_PUREC_SCALAR 1)
@@ -163,6 +167,9 @@ set(CLANG_FLAGS_ENABLE_VXE "-march=z14;-mzvector")
 set(CLANG_FLAGS_ENABLE_VXENOFMA "-march=z14;-mzvector")
 set(CLANG_FLAGS_ENABLE_VXE2 "-march=z15;-mzvector")
 set(CLANG_FLAGS_ENABLE_VXE2NOFMA "-march=z15;-mzvector")
+# RISC-V
+set(CLANG_FLAGS_ENABLE_RVVM1 "-march=rv64gcv")
+set(CLANG_FLAGS_ENABLE_RVVM2 "-march=rv64gcv")
 
 set(FLAGS_OTHERS "")
 
diff --git a/src/arch/helperpurec_scalar.h b/src/arch/helperpurec_scalar.h
index d8b9c845..fb83b84c 100644
--- a/src/arch/helperpurec_scalar.h
+++ b/src/arch/helperpurec_scalar.h
@@ -54,7 +54,7 @@
 #define ENABLE_FMA_SP
 //@#define ENABLE_FMA_SP
 
-#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || CONFIG == 3
+#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || defined(__riscv) || CONFIG == 3
 #ifndef FP_FAST_FMA
 //@#ifndef FP_FAST_FMA
 #define FP_FAST_FMA
diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h
new file mode 100644
index 00000000..fafac723
--- /dev/null
+++ b/src/arch/helperrvv.h
@@ -0,0 +1,1016 @@
+#ifndef HELPERRVV_H
+#define HELPERRVV_H
+
+#if !defined(SLEEF_GENHEADER)
+#include <riscv_vector.h>
+#include "misc.h"
+
+#if defined(VECTLENDP) || defined(VECTLENSP)
+#error VECTLENDP or VECTLENSP already defined
+#endif
+#endif // #if !defined(SLEEF_GENHEADER)
+
+#if CONFIG == 1
+#define ISANAME "RISC-V Vector Extension with Min. VLEN"
+#define SLEEF_RVV_VLEN __riscv_v_min_vlen
+#else
+#define ISANAME "RISC-V Vector Extension VLEN=2^"#CONFIG
+#define SLEEF_RVV_VLEN (1 << CONFIG)
+#endif
+
+#ifndef CONFIG
+#error CONFIG macro not defined
+#endif
+
+#define ENABLE_SP
+#define ENABLE_FMA_DP
+#define ENABLE_DP
+
+static INLINE int vavailability_i(int name) { return -1; }
+
+////////////////////////////////////////////////////////////////////////////////
+// RISC-V Vector Types
+////////////////////////////////////////////////////////////////////////////////
+
+// About the RISC-V Vector type translations:
+//
+// Because the single- and double-precision versions of the RVV port have
+// conflicting definitions of the vmask and vopmask types, they can only
+// be defined for at most one precision level in a single translation unit.
+// Any functions that use vmask or vopmask types are thus enabled only by the
+// corresponding ENABLE_RVV_SP or ENABLE_RVV_DP macro guards.
+#if defined(ENABLE_RVV_SP) && defined(ENABLE_RVV_DP)
+#error Cannot simultaneously define ENABLE_RVV_SP and ENABLE_RVV_DP
+#endif
+
+#ifdef ENABLE_RVV_SP
+// Types that conflict with ENABLE_RVV_DP
+#ifdef ENABLE_RVVM1
+typedef vuint64m2_t vmask;
+typedef vbool32_t vopmask;
+#else
+typedef vuint64m4_t vmask;
+typedef vbool16_t vopmask;
+#endif
+#endif
+
+#ifdef ENABLE_RVV_DP
+// Types that conflict with ENABLE_RVV_SP
+#ifdef ENABLE_RVVM1
+typedef vuint64m1_t vmask;
+typedef vbool64_t vopmask;
+#else
+typedef vuint64m2_t vmask;
+typedef vbool32_t vopmask;
+#endif
+#endif
+
+// LMUL-Dependent Type & Macro Definitions:
+//
+// Some SLEEF types are multi-value structs. RVV vectors have unknown length at
+// compile time, so they cannote appear in a struct in Clang. They are instead
+// represented as single vectors with "members" packed into the registers of a
+// wide-LMUL register group. In the largest cases (ddi_t and ddf_t), this
+// requires LMUL=8 if the base type (vfloat or vdouble) has LMUL=2, meaning
+// LMUL=2 is currently the widest option for SLEEF function argument types.
+#ifdef ENABLE_RVVM1
+
+typedef vint32mf2_t vint;
+typedef vfloat64m1_t vdouble;
+typedef vfloat64m2_t vdouble2;
+typedef vfloat64m4_t vdouble3;
+typedef vfloat64m4_t dd2;
+typedef vuint64m2_t vquad;
+typedef vint32m2_t di_t;
+typedef vint32m4_t ddi_t;
+typedef vfloat32m1_t vfloat;
+typedef vfloat32m2_t vfloat2;
+typedef vfloat32m4_t df2;
+typedef vint32m1_t vint2;
+typedef vint32m2_t fi_t;
+typedef vint32m4_t dfi_t;
+#define SLEEF_RVV_SP_LMUL 1
+#define SLEEF_RVV_DP_LMUL 1
+#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32)
+#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64)
+#define SLEEF_RVV_SP_VCAST_VF_F vfmv_v_f_f32m1
+#define SLEEF_RVV_SP_VCAST_VI2_I vmv_v_x_i32m1
+#define SLEEF_RVV_SP_VCAST_VU2_U vmv_v_x_u32m1
+#define SLEEF_RVV_SP_VREINTERPRET_VF vreinterpret_f32m1
+#define SLEEF_RVV_SP_VREINTERPRET_VF2 vreinterpret_f32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VI2 vreinterpret_i32m1
+#define SLEEF_RVV_SP_VREINTERPRET_2VI vreinterpret_i32m2
+#define SLEEF_RVV_SP_VREINTERPRET_4VI vreinterpret_i32m4
+#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m1
+#define SLEEF_RVV_SP_VREINTERPRET_VU2 vreinterpret_u32m1
+#define SLEEF_RVV_SP_VGET_VI2 vget_i32m1
+#define SLEEF_RVV_SP_VGET_2VI vget_i32m2
+#define SLEEF_RVV_SP_VGET_VF vget_f32m1
+#define SLEEF_RVV_SP_VGET_VF2 vget_f32m2
+#define SLEEF_RVV_SP_VGET_4VF vget_f32m4
+#define SLEEF_RVV_SP_VGET_VU2 vget_u32m2
+#define SLEEF_RVV_SP_LOAD_VF vle32_v_f32m1
+#define SLEEF_RVV_SP_LOAD_VI2 vle32_v_i32m1
+#define SLEEF_RVV_SP_VCAST_VM_U vmv_v_x_u64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VI64 vreinterpret_i64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m1
+#define SLEEF_RVV_SP_LOAD_VI vle32_v_i32m1
+#define SLEEF_RVV_DP_VCAST_VD_D vfmv_v_f_f64m1
+#define SLEEF_RVV_DP_VCAST_VD_VI(x) vfwcvt_f(x, VECTLENDP)
+#define SLEEF_RVV_DP_VCAST_VI_I vmv_v_x_i32mf2
+#define SLEEF_RVV_DP_VCAST_VM_U vmv_v_x_u64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VD vreinterpret_f64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VD2 vreinterpret_f64m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \
+  vreinterpret_v_i64m2_i32m2(vreinterpret_i64m2(x))
+#define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \
+  vreinterpret_f64m2(vreinterpret_v_i32m2_i64m2(x))
+#define SLEEF_RVV_DP_VREINTERPRET_4VD vreinterpret_f64m4
+#define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \
+  vreinterpret_f64m4(vreinterpret_v_i32m4_i64m4(x))
+#define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \
+  vreinterpret_v_i64m4_i32m4(vreinterpret_i64m4(x))
+#define SLEEF_RVV_DP_VREINTERPRET_VM vreinterpret_u64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VI64 vreinterpret_i64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VU64 vreinterpret_u64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VI vreinterpret_i32mf2
+#define SLEEF_RVV_DP_VREINTERPRET_VI2 vreinterpret_i32m1
+#define SLEEF_RVV_DP_VREINTERPRET_2VI vreinterpret_i32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VI vreinterpret_i32m4
+#define SLEEF_RVV_DP_VREINTERPRET_8VI vreinterpret_i32m8
+#define SLEEF_RVV_DP_VREINTERPRET_VU vreinterpret_u32mf2
+#define SLEEF_RVV_DP_VREINTERPRET_2VU vreinterpret_u32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VU vreinterpret_u32m4
+#define SLEEF_RVV_DP_VGET_VM vget_u64m1
+#define SLEEF_RVV_DP_VGET_VD vget_f64m1
+#define SLEEF_RVV_DP_VGET_VD2 vget_f64m2
+#define SLEEF_RVV_DP_VGET_4VD vget_f64m2
+#define SLEEF_RVV_DP_VGET_VI vget_i32m1
+#define SLEEF_RVV_DP_VGET_VI2 vget_i32m1
+#define SLEEF_RVV_DP_VGET_2VI vget_i32m1
+#define SLEEF_RVV_DP_VGET_4VI vget_i32m2
+#define SLEEF_RVV_DP_VGET_8VI vget_i32m4
+#define SLEEF_RVV_DP_VGET_VU vget_u32m1
+#define SLEEF_RVV_DP_LOAD_VD vle64_v_f64m1
+#define SLEEF_RVV_DP_LOAD_VI vle32_v_i32mf2
+
+#else
+
+typedef vint32m1_t vint;
+typedef vfloat64m2_t vdouble;
+typedef vfloat64m4_t vdouble2;
+typedef vfloat64m8_t vdouble3;
+typedef vfloat64m8_t dd2;
+typedef vuint64m4_t vquad;
+typedef vint32m4_t di_t;
+typedef vint32m8_t ddi_t;
+typedef vfloat32m2_t vfloat;
+typedef vfloat32m4_t vfloat2;
+typedef vfloat32m8_t df2;
+typedef vint32m2_t vint2;
+typedef vint32m4_t fi_t;
+typedef vint32m8_t dfi_t;
+#define SLEEF_RVV_SP_LMUL 2
+#define SLEEF_RVV_DP_LMUL 2
+#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32)
+#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64)
+#define SLEEF_RVV_SP_VCAST_VF_F vfmv_v_f_f32m2
+#define SLEEF_RVV_SP_VCAST_VI2_I vmv_v_x_i32m2
+#define SLEEF_RVV_SP_VCAST_VU2_U vmv_v_x_u32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VF vreinterpret_f32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VF2 vreinterpret_f32m4
+#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VI2 vreinterpret_i32m2
+#define SLEEF_RVV_SP_VREINTERPRET_2VI vreinterpret_i32m4
+#define SLEEF_RVV_SP_VREINTERPRET_4VI vreinterpret_i32m8
+#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VU2 vreinterpret_u32m2
+#define SLEEF_RVV_SP_VGET_VI2 vget_i32m2
+#define SLEEF_RVV_SP_VGET_2VI vget_i32m4
+#define SLEEF_RVV_SP_VGET_VF vget_f32m2
+#define SLEEF_RVV_SP_VGET_VF2 vget_f32m4
+#define SLEEF_RVV_SP_VGET_4VF vget_f32m8
+#define SLEEF_RVV_SP_VGET_VU2 vget_u32m4
+#define SLEEF_RVV_SP_LOAD_VF vle32_v_f32m2
+#define SLEEF_RVV_SP_LOAD_VI2 vle32_v_i32m2
+#define SLEEF_RVV_SP_VCAST_VM_U vmv_v_x_u64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VI64 vreinterpret_i64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m2
+#define SLEEF_RVV_SP_LOAD_VI vle32_v_i32m2
+#define SLEEF_RVV_DP_VCAST_VD_D vfmv_v_f_f64m2
+#define SLEEF_RVV_DP_VCAST_VD_VI(x) vfwcvt_f(x, VECTLENDP)
+#define SLEEF_RVV_DP_VCAST_VI_I vmv_v_x_i32m1
+#define SLEEF_RVV_DP_VCAST_VM_U vmv_v_x_u64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VD vreinterpret_f64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VD2 vreinterpret_f64m4
+#define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \
+  vreinterpret_v_i64m4_i32m4(vreinterpret_i64m4(x))
+#define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \
+  vreinterpret_f64m4(vreinterpret_v_i32m4_i64m4(x))
+#define SLEEF_RVV_DP_VREINTERPRET_4VD vreinterpret_f64m8
+#define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \
+  vreinterpret_f64m8(vreinterpret_v_i32m8_i64m8(x))
+#define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \
+  vreinterpret_v_i64m8_i32m8(vreinterpret_i64m8(x))
+#define SLEEF_RVV_DP_VREINTERPRET_VM vreinterpret_u64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VI64 vreinterpret_i64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VU64 vreinterpret_u64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VI vreinterpret_i32m1
+#define SLEEF_RVV_DP_VREINTERPRET_VI2 vreinterpret_i32m1
+#define SLEEF_RVV_DP_VREINTERPRET_2VI vreinterpret_i32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VI vreinterpret_i32m4
+#define SLEEF_RVV_DP_VREINTERPRET_8VI vreinterpret_i32m8
+#define SLEEF_RVV_DP_VREINTERPRET_VU vreinterpret_u32m1
+#define SLEEF_RVV_DP_VREINTERPRET_2VU vreinterpret_u32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VU vreinterpret_u32m4
+#define SLEEF_RVV_DP_VGET_VM vget_u64m2
+#define SLEEF_RVV_DP_VGET_VD vget_f64m2
+#define SLEEF_RVV_DP_VGET_VD2 vget_f64m4
+#define SLEEF_RVV_DP_VGET_4VD vget_f64m4
+#define SLEEF_RVV_DP_VGET_VI vget_i32m1
+#define SLEEF_RVV_DP_VGET_VI2 vget_i32m1
+#define SLEEF_RVV_DP_VGET_2VI vget_i32m2
+#define SLEEF_RVV_DP_VGET_4VI vget_i32m4
+#define SLEEF_RVV_DP_VGET_8VI vget_i32m8
+#define SLEEF_RVV_DP_VGET_VU vget_u32m1
+#define SLEEF_RVV_DP_LOAD_VD vle64_v_f64m2
+#define SLEEF_RVV_DP_LOAD_VI vle32_v_i32m1
+
+#endif // ENABLE_RVVM1
+
+////////////////////////////////////////////////////////////////////////////////
+// Single-Precision Functions
+////////////////////////////////////////////////////////////////////////////////
+
+/****************************************/
+/* Multi-value and multi-word types     */
+/****************************************/
+// fi type
+static INLINE vfloat figetd_vf_di(fi_t d) {
+  return SLEEF_RVV_SP_VREINTERPRET_VF(SLEEF_RVV_SP_VGET_VI2(d, 0));
+}
+static INLINE vint2 figeti_vi2_di(fi_t d) {
+  return SLEEF_RVV_SP_VGET_VI2(d, 1);
+}
+static INLINE fi_t fisetdi_fi_vf_vi2(vfloat d, vint2 i) {
+  fi_t res;
+  res = vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_VI2(d));
+  res = vset(res, 1, i);
+  return res;
+}
+static INLINE vfloat2 dfigetdf_vf2_dfi(dfi_t d) {
+  return SLEEF_RVV_SP_VREINTERPRET_VF2(SLEEF_RVV_SP_VGET_2VI(d, 0));
+}
+static INLINE vint2 dfigeti_vi2_dfi(dfi_t d) {
+  return SLEEF_RVV_SP_VGET_VI2(d, 2);
+}
+static INLINE dfi_t dfisetdfi_dfi_vf2_vi2(vfloat2 v, vint2 i) {
+  dfi_t res;
+  res = vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v));
+  res = vset(res, 2, i);
+  return res;
+}
+static INLINE dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) {
+  return vset(dfi, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v));
+}
+// vfloat2 type
+static INLINE vfloat vf2getx_vf_vf2(vfloat2 v) {
+  return SLEEF_RVV_SP_VGET_VF(v, 0);
+}
+static INLINE vfloat vf2gety_vf_vf2(vfloat2 v) {
+  return SLEEF_RVV_SP_VGET_VF(v, 1);
+}
+static INLINE vfloat2 vf2setxy_vf2_vf_vf(vfloat x, vfloat y) {
+  vfloat2 res;
+  res = vset(res, 0, x);
+  res = vset(res, 1, y);
+  return res;
+}
+static INLINE vfloat2 vf2setx_vf2_vf2_vf(vfloat2 v, vfloat d) {
+  return vset(v, 0, d);
+}
+static INLINE vfloat2 vf2sety_vf2_vf2_vf(vfloat2 v, vfloat d) {
+  return vset(v, 1, d);
+}
+// df2 type
+static df2 df2setab_df2_vf2_vf2(vfloat2 a, vfloat2 b) {
+  df2 res;
+  res = vset(res, 0, a);
+  res = vset(res, 1, b);
+  return res;
+}
+static vfloat2 df2geta_vf2_df2(df2 d) { return SLEEF_RVV_SP_VGET_VF2(d, 0); }
+static vfloat2 df2getb_vf2_df2(df2 d) { return SLEEF_RVV_SP_VGET_VF2(d, 1); }
+static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) {
+  return SLEEF_RVV_SP_VREINTERPRET_VI2(vf);
+}
+static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) {
+  return SLEEF_RVV_SP_VREINTERPRET_VF(vi);
+}
+
+
+/****************************************/
+/* Type Conversions and Broadcasts      */
+/****************************************/
+static INLINE vfloat vcast_vf_f(float f) {
+  return SLEEF_RVV_SP_VCAST_VF_F(f, VECTLENSP);
+}
+static INLINE vfloat vrint_vf_vf(vfloat vd) {
+  // It is not currently possible to safely set frm for intrinsics,
+  // so emulate round-to-nearest behavior
+  vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP);
+  half = vfsgnj(half, vd, VECTLENSP);
+  vfloat res = vfadd(vd, half, VECTLENSP);
+  vint2 i = vfcvt_rtz_x(res, VECTLENSP);
+  return vfcvt_f(i, VECTLENSP);
+}
+static INLINE vfloat vcast_vf_vi2(vint2 vi) {
+  return vfcvt_f(vi, VECTLENSP);
+}
+static INLINE vint2 vcast_vi2_i(int i) {
+  return SLEEF_RVV_SP_VCAST_VI2_I(i, VECTLENSP);
+}
+static INLINE vint2 vrint_vi2_vf(vfloat vf) {
+  // It is not currently possible to safely set frm for intrinsics,
+  // so emulate round-to-nearest behavior
+  vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP);
+  half = vfsgnj(half, vf, VECTLENSP);
+  vfloat res = vfadd(vf, half, VECTLENSP);
+  return vfcvt_rtz_x(res, VECTLENSP);
+}
+static INLINE vint2 vtruncate_vi2_vf(vfloat vf) {
+  return vfcvt_rtz_x(vf, VECTLENSP);
+}
+static INLINE vfloat vtruncate_vf_vf(vfloat vf) {
+  return vcast_vf_vi2(vtruncate_vi2_vf(vf));
+}
+
+
+/****************************************/
+/* Memory Operations                    */
+/****************************************/
+static INLINE vfloat vload_vf_p(const float *ptr) {
+  return SLEEF_RVV_SP_LOAD_VF(ptr, VECTLENSP);
+}
+static INLINE vfloat vloadu_vf_p(const float *ptr) {
+  return SLEEF_RVV_SP_LOAD_VF(ptr, VECTLENSP);
+}
+static INLINE void vstore_v_p_vf(float *ptr, vfloat v) {
+  vse32(ptr, v, VECTLENSP);
+}
+static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) {
+  vse32(ptr, v, VECTLENSP);
+}
+static INLINE void vstoreu_v_p_vi2(int32_t *ptr, vint2 v) {
+  vse32(ptr, v, VECTLENSP);
+}
+static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) {
+  return vluxei32(ptr, vmul(SLEEF_RVV_SP_VREINTERPRET_VU(vi2), sizeof(float), VECTLENSP), VECTLENSP);
+}
+
+
+/****************************************/
+/* Floating-Point Arithmetic            */
+/****************************************/
+static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) {
+  return vfadd(x, y, VECTLENSP);
+}
+static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) {
+  return vfsub(x, y, VECTLENSP);
+}
+static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) {
+  return vfmul(x, y, VECTLENSP);
+}
+static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) {
+  return vfdiv(x, y, VECTLENSP);
+}
+static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) {
+  return vfmax(x, y, VECTLENSP);
+}
+static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) {
+  return vfmin(x, y, VECTLENSP);
+}
+static INLINE vfloat vrec_vf_vf(vfloat d) {
+  return vfdiv(vcast_vf_f(1.0f), d, VECTLENSP);
+}
+static INLINE vfloat vsqrt_vf_vf(vfloat d) {
+  return vfsqrt(d, VECTLENSP);
+}
+// fused multiply-add/subtract
+static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
+  return vfmadd(x, y, z, VECTLENSP);
+}
+static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
+  return vfnmsub(x, y, z, VECTLENSP);
+}
+// sign manipulation
+static INLINE vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) {
+  return vfsgnjx(x, y, VECTLENSP);
+}
+static INLINE vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) {
+  return vfsgnj(x, y, VECTLENSP);
+}
+static INLINE vfloat vsign_vf_vf(vfloat f) {
+  return vfsgnj(SLEEF_RVV_SP_VCAST_VF_F(1.0f, VECTLENSP), f, VECTLENSP);
+}
+static INLINE vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) {
+  vint2 xi = SLEEF_RVV_SP_VREINTERPRET_VI2(x);
+  vint2 yi = SLEEF_RVV_SP_VREINTERPRET_VI2(y);
+  vint2 xioryi = vor(xi, yi, VECTLENSP);
+  vfloat xory = SLEEF_RVV_SP_VREINTERPRET_VF(xioryi);
+  return vfsgnj(x, xory, VECTLENSP);
+}
+static INLINE vfloat vabs_vf_vf(vfloat f) {
+  return vfabs(f, VECTLENSP);
+}
+static INLINE vfloat vneg_vf_vf(vfloat f) {
+  return vfneg(f, VECTLENSP);
+}
+
+
+/****************************************/
+/* Integer Arithmetic and Logic         */
+/****************************************/
+static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return vadd(x, y, VECTLENSP);
+}
+static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return vsub(x, y, VECTLENSP);
+}
+static INLINE vint2 vneg_vi2_vi2(vint2 x) {
+  return vneg(x, VECTLENSP);
+}
+static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return vand(x, y, VECTLENSP);
+}
+static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return vand(vnot(x, VECTLENSP), y, VECTLENSP);
+}
+static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return vor(x, y, VECTLENSP);
+}
+static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) {
+  return vxor(x, y, VECTLENSP);
+}
+static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) {
+  return vsll(x, c, VECTLENSP);
+}
+static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) {
+  return vsra(x, c, VECTLENSP);
+}
+static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) {
+  return SLEEF_RVV_SP_VREINTERPRET_VI2(vsrl(SLEEF_RVV_SP_VREINTERPRET_VU2(x), c, VECTLENSP));
+}
+
+#ifdef ENABLE_RVV_SP
+/****************************************/
+/* Bitmask Operations                   */
+/****************************************/
+static INLINE vfloat vreinterpret_vf_vm(vmask vm) {
+  return SLEEF_RVV_SP_VREINTERPRET_VF(vncvt_x(vm, VECTLENSP));
+}
+static INLINE vmask vreinterpret_vm_vf(vfloat vf) {
+  return vwcvtu_x(SLEEF_RVV_SP_VREINTERPRET_VU(vf), VECTLENSP);
+}
+static INLINE int vtestallones_i_vo32(vopmask g) {
+  return vcpop(g, VECTLENSP) == VECTLENSP;
+}
+static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) {
+  return SLEEF_RVV_SP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENSP);
+}
+static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) {
+  return vand(x, y, VECTLENSP);
+}
+static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) {
+  return vor(x, y, VECTLENSP);
+}
+static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) {
+  return vxor(x, y, VECTLENSP);
+}
+static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) {
+  return vand(SLEEF_RVV_SP_VREINTERPRET_VM(vnot(SLEEF_RVV_SP_VREINTERPRET_VI64(x), VECTLENSP)), y, VECTLENSP);
+}
+static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) {
+  return vmerge(x, y, -1, VECTLENSP);
+}
+static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) {
+  return vmerge(vmnot(x, VECTLENSP), y, 0, VECTLENSP);
+}
+static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) {
+  return vmerge(x, y, 0, VECTLENSP);
+}
+
+
+/****************************************/
+/* Logical Mask Operations              */
+/****************************************/
+static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) {
+  return vmand(x, y, VECTLENSP);
+}
+static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) {
+  return vmandn(y, x, VECTLENSP);
+}
+static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) {
+  return vmor(x, y, VECTLENSP);
+}
+static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) {
+  return vmxor(x, y, VECTLENSP);
+}
+// single precision FP comparison
+static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) {
+  return vmfeq(x, y, VECTLENSP);
+}
+static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) {
+  return vmfne(x, y, VECTLENSP);
+}
+static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) {
+  return vmfgt(x, y, VECTLENSP);
+}
+static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) {
+  return vmfge(x, y, VECTLENSP);
+}
+static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) {
+  return vmflt(x, y, VECTLENSP);
+}
+static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) {
+  return vmfle(x, y, VECTLENSP);
+}
+static INLINE vopmask visnan_vo_vf(vfloat d) {
+  return vmfne(d, d, VECTLENSP);
+}
+static INLINE vopmask visinf_vo_vf(vfloat d) {
+  return vmfeq(vfabs(d, VECTLENSP), SLEEF_INFINITYf, VECTLENSP);
+}
+static INLINE vopmask vispinf_vo_vf(vfloat d) {
+  return vmfeq(d, SLEEF_INFINITYf, VECTLENSP);
+}
+// conditional select
+static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) {
+  return vmerge(mask, y, x, VECTLENSP);
+}
+static INLINE vfloat vsel_vf_vo_f_f(vopmask mask, float v1, float v0) {
+  return vfmerge(mask, vcast_vf_f(v0), v1, VECTLENSP);
+}
+static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) {
+  return vfmerge(o0, vfmerge(o1, vcast_vf_f(d2), d1, VECTLENSP), d0, VECTLENSP);
+}
+static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) {
+  return vfmerge(o0, vfmerge(o1, vfmerge(o2, vcast_vf_f(d3), d2, VECTLENSP), d1, VECTLENSP), d0, VECTLENSP);
+}
+// integer comparison
+static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) {
+  return vmseq(x, y, VECTLENSP);
+}
+static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) {
+  return vmsgt(x, y, VECTLENSP);
+}
+static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) {
+  vint2 zero = vcast_vi2_i(0);
+  return vmerge(vmsgt(x, y, VECTLENSP), zero, -1, VECTLENSP);
+}
+// integer conditional select
+static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) {
+  return vmerge(m, y, x, VECTLENSP);
+}
+static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) {
+  return vmerge(vmnot(x, VECTLENSP), y, 0, VECTLENSP);
+}
+#endif // ENABLE_RVV_SP
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Double-Precision Functions
+////////////////////////////////////////////////////////////////////////////////
+
+/****************************************/
+/* Multi-value and multi-word types     */
+/****************************************/
+// vdouble2 type
+static INLINE const vdouble vd2getx_vd_vd2(vdouble2 v) {
+  return SLEEF_RVV_DP_VGET_VD(v, 0);
+}
+static INLINE const vdouble vd2gety_vd_vd2(vdouble2 v) {
+  return SLEEF_RVV_DP_VGET_VD(v, 1);
+}
+static INLINE const vdouble2 vd2setxy_vd2_vd_vd(vdouble x, vdouble y) {
+  vdouble2 res;
+  res = vset(res, 0, x);
+  res = vset(res, 1, y);
+  return res;
+}
+static INLINE const vdouble2 vd2setx_vd2_vd2_vd(vdouble2 v, vdouble d) {
+  return vset(v, 0, d);
+}
+static INLINE const vdouble2 vd2sety_vd2_vd2_vd(vdouble2 v, vdouble d) {
+  return vset(v, 1, d);
+}
+// dd2 type
+static dd2 dd2setab_dd2_vd2_vd2(vdouble2 a, vdouble2 b) {
+  dd2 res;
+  res = vset(res, 0, a);
+  res = vset(res, 1, b);
+  return res;
+}
+static vdouble2 dd2geta_vd2_dd2(dd2 d) { return SLEEF_RVV_DP_VGET_4VD(d, 0); }
+static vdouble2 dd2getb_vd2_dd2(dd2 d) { return SLEEF_RVV_DP_VGET_4VD(d, 1); }
+// vdouble3 type
+static INLINE vdouble vd3getx_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 0); }
+static INLINE vdouble vd3gety_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 1); }
+static INLINE vdouble vd3getz_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 2); }
+static INLINE vdouble3 vd3setxyz_vd3_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  vdouble3 res;
+  res = vset(res, 0, x);
+  res = vset(res, 1, y);
+  res = vset(res, 2, z);
+  return res;
+}
+static INLINE vdouble3 vd3setx_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 0, d); }
+static INLINE vdouble3 vd3sety_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 1, d); }
+static INLINE vdouble3 vd3setz_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 2, d); }
+// di type
+static INLINE vdouble digetd_vd_di(di_t d) {
+  return SLEEF_RVV_DP_VGET_VD(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(d), 0);
+}
+static INLINE vint digeti_vi_di(di_t d) {
+#ifdef ENABLE_RVVM1
+  return vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 1));
+#else
+  return SLEEF_RVV_DP_VGET_VI(d, 2);
+#endif
+}
+static INLINE di_t disetdi_di_vd_vi(vdouble d, vint i) {
+  di_t res;
+  res = SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(vset(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(res), 0, d));
+#ifdef ENABLE_RVVM1
+  res = vset(res, 1, vlmul_ext_i32m1(i));
+#else
+  res = vset(res, 2, i);
+#endif
+  return res;
+}
+// ddi type
+static INLINE vdouble2 ddigetdd_vd2_ddi(ddi_t d) {
+  return SLEEF_RVV_DP_VGET_VD2(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(d), 0);
+}
+static INLINE vint ddigeti_vi_ddi(ddi_t d) {
+#ifdef ENABLE_RVVM1
+  return vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 2));
+#else
+  return SLEEF_RVV_DP_VGET_VI(d, 4);
+#endif
+}
+static INLINE ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) {
+  ddi_t res;
+  res = SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(res), 0, v));
+#ifdef ENABLE_RVVM1
+  res = vset(res, 2, vlmul_ext_i32m1(i));
+#else
+  res = vset(res, 4, i);
+#endif
+  return res;
+}
+static INLINE ddi_t ddisetdd_ddi_ddi_vd2(ddi_t ddi, vdouble2 v) {
+  return SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(ddi), 0, v));
+}
+
+/****************************************/
+/* Type Conversions and Broadcasts      */
+/****************************************/
+static INLINE vdouble vcast_vd_d(double d) {
+  return SLEEF_RVV_DP_VCAST_VD_D(d, VECTLENDP);
+}
+static INLINE vdouble vcast_vd_vi(vint i) {
+  return SLEEF_RVV_DP_VCAST_VD_VI(i);
+}
+static INLINE vint vcast_vi_i(int32_t i) {
+  return SLEEF_RVV_DP_VCAST_VI_I(i, VECTLENDP);
+}
+static INLINE vint vrint_vi_vd(vdouble vd) {
+  // It is not currently possible to safely set frm for intrinsics,
+  // so emulate round-to-nearest behavior
+  vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP);
+  half = vfsgnj(half, vd, VECTLENDP);
+  vdouble res = vfadd(vd, half, VECTLENDP);
+  return vfncvt_rtz_x(res, VECTLENDP);
+}
+static INLINE vdouble vrint_vd_vd(vdouble vd) {
+  // It is not currently possible to safely set frm for intrinsics,
+  // so emulate round-to-nearest behavior
+  vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP);
+  half = vfsgnj(half, vd, VECTLENDP);
+  vdouble res = vfadd(vd, half, VECTLENDP);
+  return vfwcvt_f(vfncvt_rtz_x(res, VECTLENDP), VECTLENDP);
+}
+static INLINE vint vtruncate_vi_vd(vdouble vd) {
+  return vfncvt_rtz_x(vd, VECTLENDP);
+}
+static INLINE vdouble vtruncate_vd_vd(vdouble vd) {
+  return vcast_vd_vi(vtruncate_vi_vd(vd));
+}
+
+
+/****************************************/
+/* Memory Operations                    */
+/****************************************/
+static INLINE vdouble vload_vd_p(const double *ptr) {
+  return SLEEF_RVV_DP_LOAD_VD(ptr, VECTLENDP);
+}
+static INLINE vdouble vloadu_vd_p(const double *ptr) {
+  return SLEEF_RVV_DP_LOAD_VD(ptr, VECTLENDP);
+}
+static INLINE vint vloadu_vi_p(int32_t *p) {
+  return SLEEF_RVV_DP_LOAD_VI(p, VECTLENDP);
+}
+static INLINE void vstore_v_p_vd(double *ptr, vdouble v) {
+  vse64(ptr, v, VECTLENDP);
+}
+static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) {
+  vse64(ptr, v, VECTLENDP);
+}
+static INLINE void vstoreu_v_p_vi(int32_t *ptr, vint v) {
+  vse32(ptr, v, VECTLENDP);
+}
+static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) {
+  return vluxei64(ptr, vwmulu(SLEEF_RVV_DP_VREINTERPRET_VU(vi), sizeof(double), VECTLENDP), VECTLENDP);
+}
+
+
+/****************************************/
+/* Floating-Point Arithmetic            */
+/****************************************/
+static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) {
+  return vfadd(x, y, VECTLENDP);
+}
+static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) {
+  return vfsub(x, y, VECTLENDP);
+}
+static INLINE vdouble vrec_vd_vd(vdouble d) {
+  return vfdiv(vcast_vd_d(1.0), d, VECTLENDP);
+}
+static INLINE vdouble vabs_vd_vd(vdouble d) {
+  return vfabs(d, VECTLENDP);
+}
+static INLINE vdouble vsqrt_vd_vd(vdouble d) {
+  return vfsqrt(d, VECTLENDP);
+}
+static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) {
+  return vfmul(x, y, VECTLENDP);
+}
+static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) {
+  return vfdiv(x, y, VECTLENDP);
+}
+static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) {
+  return vfmax(x, y, VECTLENDP);
+}
+static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) {
+  return vfmin(x, y, VECTLENDP);
+}
+// fused multiply add / sub
+static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  return vfmadd(x, y, z, VECTLENDP);
+}
+static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  return vfmsub(x, y, z, VECTLENDP);
+}
+static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  return vfmadd(x, y, z, VECTLENDP);
+}
+static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  return vfnmsub(x, y, z, VECTLENDP);
+}
+static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
+  return vfmsub(x, y, z, VECTLENDP);
+}
+// sign manipulation
+static INLINE vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) {
+  return vfsgnjx(x, y, VECTLENDP);
+}
+static INLINE vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) {
+  return vfsgnj(x, y, VECTLENDP);
+}
+static INLINE vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) {
+  return vfsgnj(x, SLEEF_RVV_DP_VREINTERPRET_VD(vor(SLEEF_RVV_DP_VREINTERPRET_VM(x), SLEEF_RVV_DP_VREINTERPRET_VM(y), VECTLENDP)), VECTLENDP);
+}
+static INLINE vdouble vneg_vd_vd(vdouble d) {
+  return vfneg(d, VECTLENDP);
+}
+
+
+/****************************************/
+/* Integer Arithmetic and Logic         */
+/****************************************/
+static INLINE vint vadd_vi_vi_vi(vint x, vint y) {
+  return vadd(x, y, VECTLENDP);
+}
+static INLINE vint vsub_vi_vi_vi(vint x, vint y) {
+  return vsub(x, y, VECTLENDP);
+}
+static INLINE vint vneg_vi_vi(vint x) {
+  return vneg(x, VECTLENDP);
+}
+static INLINE vint vand_vi_vi_vi(vint x, vint y) {
+  return vand(x, y, VECTLENDP);
+}
+static INLINE vint vandnot_vi_vi_vi(vint x, vint y) {
+  return vand(vnot(x, VECTLENDP), y, VECTLENDP);
+}
+static INLINE vint vor_vi_vi_vi(vint x, vint y) {
+  return vor(x, y, VECTLENDP);
+}
+static INLINE vint vxor_vi_vi_vi(vint x, vint y) {
+  return vxor(x, y, VECTLENDP);
+}
+static INLINE vint vsll_vi_vi_i(vint x, int c) {
+  return vsll(x, c, VECTLENDP);
+}
+static INLINE vint vsra_vi_vi_i(vint x, int c) {
+  return vsra(x, c, VECTLENDP);
+}
+static INLINE vint vsrl_vi_vi_i(vint x, int c) {
+  return SLEEF_RVV_DP_VREINTERPRET_VI(vsrl(SLEEF_RVV_DP_VREINTERPRET_VU(x), c, VECTLENDP));
+}
+
+
+#ifdef ENABLE_RVV_DP
+/****************************************/
+/* Bitmask Operations                   */
+/****************************************/
+static INLINE vmask vcast_vm_i64(int64_t c) {
+  return SLEEF_RVV_DP_VCAST_VM_U(c, VECTLENDP);
+}
+static INLINE vmask vcast_vm_u64(uint64_t c) {
+  return SLEEF_RVV_DP_VCAST_VM_U(c, VECTLENDP);
+}
+static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) {
+  return SLEEF_RVV_DP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENDP);
+}
+static INLINE vmask vcast_vm_vi(vint vi) {
+  return SLEEF_RVV_DP_VREINTERPRET_VM(vwcvt_x(vi, VECTLENDP));
+}
+static INLINE vmask vcastu_vm_vi(vint vi) {
+  return vsll(SLEEF_RVV_DP_VREINTERPRET_VM(vwcvt_x(vi, VECTLENDP)), 32, VECTLENDP);
+}
+static INLINE vint vcastu_vi_vm(vmask vm) {
+  return SLEEF_RVV_DP_VREINTERPRET_VI(vnsrl(vm, 32, VECTLENDP));
+}
+static INLINE vint vcast_vi_vm(vmask vm) {
+  return SLEEF_RVV_DP_VREINTERPRET_VI(vncvt_x(vm, VECTLENDP));
+}
+static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) {
+  return vmerge(vmnot(x, VECTLENDP), y, 0, VECTLENDP);
+}
+static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) {
+  return vand(x, y, VECTLENDP);
+}
+static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) {
+  return vor(x, y, VECTLENDP);
+}
+static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) {
+  return vxor(x, y, VECTLENDP);
+}
+static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) {
+  return vand(SLEEF_RVV_DP_VREINTERPRET_VM(vnot(SLEEF_RVV_DP_VREINTERPRET_VI64(x), VECTLENDP)), y, VECTLENDP);
+}
+static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) {
+  return vmerge(x, y, 0, VECTLENDP);
+}
+static INLINE vmask vsll64_vm_vm_i(vmask mask, int64_t c) {
+  return vsll(mask, c, VECTLENDP);
+}
+static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) {
+  return SLEEF_RVV_DP_VREINTERPRET_VM(vsub(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP));
+}
+static INLINE vmask vsrl64_vm_vm_i(vmask mask, int64_t c) {
+  return vsrl(mask, c, VECTLENDP);
+}
+static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) {
+  return vadd(x, y, VECTLENDP);
+}
+static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) {
+  return vmerge(x, y, -1, VECTLENDP);
+}
+static INLINE vmask vsel_vm_vo64_vm_vm(vopmask mask, vmask x, vmask y) {
+  return vmerge(mask, y, x, VECTLENDP);
+}
+static INLINE vmask vneg64_vm_vm(vmask mask) {
+  return SLEEF_RVV_DP_VREINTERPRET_VM(vneg(SLEEF_RVV_DP_VREINTERPRET_VI64(mask), VECTLENDP));
+}
+static INLINE vdouble vreinterpret_vd_vm(vmask vm) {
+  return SLEEF_RVV_DP_VREINTERPRET_VD(vm);
+}
+static INLINE vmask vreinterpret_vm_vd(vdouble vd) {
+  return SLEEF_RVV_DP_VREINTERPRET_VM(vd);
+}
+
+// vquad type
+static INLINE const vmask vqgetx_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v, 0); }
+static INLINE const vmask vqgety_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v, 1); }
+static INLINE vquad vqsetxy_vq_vm_vm(vmask x, vmask y) {
+  vquad res;
+  res = vset(res, 0, x);
+  res = vset(res, 1, y);
+  return res;
+}
+static INLINE vquad vqsetx_vq_vq_vm(vquad v, vmask x) { return vset(v, 0, x); }
+static INLINE vquad vqsety_vq_vq_vm(vquad v, vmask y) { return vset(v, 1, y); }
+
+
+
+/****************************************/
+/* Logical Mask Operations              */
+/****************************************/
+static INLINE vopmask vcast_vo64_vo32(vopmask vo) {
+  return vo;
+}
+static INLINE vopmask vcast_vo32_vo64(vopmask vo) {
+  return vo;
+}
+static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) {
+  return vmand(x, y, VECTLENDP);
+}
+static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) {
+  return vmandn(y, x, VECTLENDP);
+}
+static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) {
+  return vmor(x, y, VECTLENDP);
+}
+static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) {
+  return vmxor(x, y, VECTLENDP);
+}
+static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) {
+  return vmseq(x, y, VECTLENDP);
+}
+static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) {
+  return vmsgt(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP);
+}
+// double-precision comparison
+static INLINE vopmask visinf_vo_vd(vdouble d) {
+  return vmfeq(vfabs(d, VECTLENDP), SLEEF_INFINITY, VECTLENDP);
+}
+static INLINE vopmask vispinf_vo_vd(vdouble d) {
+  return vmfeq(d, SLEEF_INFINITY, VECTLENDP);
+}
+static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) {
+  return vmfeq(x, y, VECTLENDP);
+}
+static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) {
+  return vmfne(x, y, VECTLENDP);
+}
+static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) {
+  return vmflt(x, y, VECTLENDP);
+}
+static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) {
+  return vmfle(x, y, VECTLENDP);
+}
+static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) {
+  return vmfgt(x, y, VECTLENDP);
+}
+static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) {
+  return vmfge(x, y, VECTLENDP);
+}
+static INLINE vopmask visnan_vo_vd(vdouble d) {
+  return vmfne(d, d, VECTLENDP);
+}
+// double-precision conditional select
+static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) {
+  return vmerge(mask, y, x, VECTLENDP);
+}
+static INLINE vdouble vsel_vd_vo_d_d(vopmask mask, double v0, double v1) {
+  return vfmerge(mask, vcast_vd_d(v1), v0, VECTLENDP);
+}
+static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
+  return vfmerge(o0, vfmerge(o1, vcast_vd_d(d2), d1, VECTLENDP), d0, VECTLENDP);
+}
+static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
+  return vfmerge(o0, vfmerge(o1, vfmerge(o2, vcast_vd_d(d3), d2, VECTLENDP), d1, VECTLENDP), d0, VECTLENDP);
+}
+static INLINE int vtestallones_i_vo64(vopmask g) {
+  return vcpop(g, VECTLENDP) == VECTLENDP;
+}
+// integer comparison
+static INLINE vopmask veq_vo_vi_vi(vint x, vint y) {
+  return vmseq(x, y, VECTLENDP);
+}
+static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) {
+  return vmsgt(x, y, VECTLENDP);
+}
+static INLINE vint vgt_vi_vi_vi(vint x, vint y) {
+  vint zero = vcast_vi_i(0);
+  return vmerge(vmsgt(x, y, VECTLENDP), zero, -1, VECTLENDP);
+}
+// integer conditional select
+static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) {
+  return vmerge(m, y, x, VECTLENDP);
+}
+static INLINE vint vandnot_vi_vo_vi(vopmask mask, vint vi) {
+  return vmerge(mask, vi, 0, VECTLENDP);
+}
+static INLINE vint vand_vi_vo_vi(vopmask x, vint y) {
+  return vmerge(vmnot(x, VECTLENDP), y, 0, VECTLENDP);
+}
+#endif // ENABLE_RVV_DP
+
+#endif // HELPERRVV_H
diff --git a/src/common/commonfuncs.h b/src/common/commonfuncs.h
index 2f1a0da9..aff782df 100644
--- a/src/common/commonfuncs.h
+++ b/src/common/commonfuncs.h
@@ -3,7 +3,7 @@
 //    (See accompanying file LICENSE.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
 typedef struct {
   vdouble x, y, z;
 } vdouble3;
@@ -210,14 +210,17 @@ static INLINE CONST VECTOR_CC vdouble vtoward0_vd_vd(vdouble x) { // returns nex
   return vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vcast_vd_d(0), t);
 }
 
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
 static INLINE CONST vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) {
   return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y)));
 }
+#endif
 
 static INLINE CONST VECTOR_CC vdouble vsign_vd_vd(vdouble d) {
   return vmulsign_vd_vd_vd(vcast_vd_d(1.0), d);
 }
 
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
 static INLINE CONST VECTOR_CC vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) {
   return vreinterpret_vd_vm(vor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y)));
 }
@@ -226,6 +229,7 @@ static INLINE CONST VECTOR_CC vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) {
   return vreinterpret_vd_vm(vxor_vm_vm_vm(vandnot_vm_vm_vm(vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(x)), 
 					  vand_vm_vm_vm   (vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(y))));
 }
+#endif
 
 static INLINE CONST VECTOR_CC vdouble vtruncate2_vd_vd(vdouble x) {
 #ifdef FULL_FP_ROUNDING
diff --git a/src/common/dd.h b/src/common/dd.h
index b1423556..89af2e87 100644
--- a/src/common/dd.h
+++ b/src/common/dd.h
@@ -3,7 +3,7 @@
 //    (See accompanying file LICENSE.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
 #if !defined(ENABLE_CUDA)
 typedef struct {
   vdouble x, y;
diff --git a/src/common/df.h b/src/common/df.h
index 4e3e7949..0883b227 100644
--- a/src/common/df.h
+++ b/src/common/df.h
@@ -3,7 +3,7 @@
 //    (See accompanying file LICENSE.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
 #if !defined(ENABLE_CUDA)
 typedef struct {
   vfloat x, y;
diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c
index 002cb0f1..90353586 100644
--- a/src/libm-tester/iutsimd.c
+++ b/src/libm-tester/iutsimd.c
@@ -343,6 +343,18 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2;
 typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2;
 #endif
 
+#ifdef ENABLE_RVVM1
+#define CONFIG 1
+#include "helperrvv.h"
+#include "renamervvm1.h"
+#endif
+
+#ifdef ENABLE_RVVM2
+#define CONFIG 1
+#include "helperrvv.h"
+#include "renamervvm2.h"
+#endif
+
 #ifdef ENABLE_PUREC_SCALAR
 #include "renamepurec_scalar.h"
 #if !defined(USE_INLINE_HEADER)
@@ -426,12 +438,12 @@ int check_feature(double d, float f) {
   return 0;
 }
 
-#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(USE_INLINE_HEADER))
+#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2) || defined(USE_INLINE_HEADER))
 static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; }
 static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; }
 #endif
 
-#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(USE_INLINE_HEADER))
+#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2) || defined(USE_INLINE_HEADER))
 static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; }
 static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; }
 #endif
diff --git a/src/libm-tester/tester2simddp.c b/src/libm-tester/tester2simddp.c
index 540d1142..9d723868 100644
--- a/src/libm-tester/tester2simddp.c
+++ b/src/libm-tester/tester2simddp.c
@@ -191,6 +191,22 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2;
 typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2;
 #endif
 
+#ifdef ENABLE_RVVM1
+#define CONFIG 1
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm1.h"
+#include "sleef.h"
+#endif
+
+#ifdef ENABLE_RVVM2
+#define CONFIG 1
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm2.h"
+#include "sleef.h"
+#endif
+
 #ifdef ENABLE_PUREC_SCALAR
 #define CONFIG 1
 #include "helperpurec_scalar.h"
@@ -209,7 +225,7 @@ typedef Sleef_float_2 vfloat2;
 
 //
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
 static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; }
 static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; }
 #endif
diff --git a/src/libm-tester/tester2simdsp.c b/src/libm-tester/tester2simdsp.c
index d140ba4b..d83e8b4b 100644
--- a/src/libm-tester/tester2simdsp.c
+++ b/src/libm-tester/tester2simdsp.c
@@ -191,6 +191,22 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2;
 typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2;
 #endif
 
+#ifdef ENABLE_RVVM1
+#define CONFIG 1
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#include "renamervvm1.h"
+#include "sleef.h"
+#endif
+
+#ifdef ENABLE_RVVM2
+#define CONFIG 1
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#include "renamervvm2.h"
+#include "sleef.h"
+#endif
+
 #ifdef ENABLE_PUREC_SCALAR
 #define CONFIG 1
 #include "helperpurec_scalar.h"
@@ -209,7 +225,7 @@ typedef Sleef_float_2 vfloat2;
 
 //
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
 static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; }
 static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; }
 #endif
diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt
index fe0a5d39..352383b1 100644
--- a/src/libm/CMakeLists.txt
+++ b/src/libm/CMakeLists.txt
@@ -60,6 +60,13 @@ elseif(SLEEF_ARCH_S390X)
     PURECFMA_SCALAR
     DSP_SCALAR
   )
+elseif(SLEEF_ARCH_RISCV64)
+  set(SLEEF_HEADER_LIST
+    RVVM1
+    RVVM2
+    PUREC_SCALAR
+    PURECFMA_SCALAR
+    )
 endif()
 
 # HEADER_PARAMS
@@ -98,6 +105,9 @@ command_arguments(HEADER_PARAMS_VXENOFMA        cinz_ 2 4 "SLEEF_VECTOR_DOUBLE"
 command_arguments(HEADER_PARAMS_VXE2            finz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2)
 command_arguments(HEADER_PARAMS_VXE2NOFMA       cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2nofma)
 
+command_arguments(HEADER_PARAMS_RVVM1           finz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v m1)
+command_arguments(HEADER_PARAMS_RVVM2           finz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v m2)
+
 command_arguments(HEADER_PARAMS_DSP_SCALAR      -     1 1 double float int32_t int32_t __STDC__)
 command_arguments(HEADER_PARAMS_PUREC_SCALAR    cinz_ 1 1 double float int32_t int32_t __STDC__ purec)
 command_arguments(HEADER_PARAMS_PURECFMA_SCALAR finz_ 1 1 double float int32_t int32_t __STDC__ purecfma)
@@ -144,6 +154,9 @@ command_arguments(RENAME_PARAMS_GNUABI_ADVSIMD  advsimd n 2 4 float64x2_t float3
 # the "x" token of VLA SVE vector functions.
 command_arguments(RENAME_PARAMS_GNUABI_SVE sve s x x svfloat64_t svfloat32_t svint32_t svint32_t __ARM_SVE)
 
+command_arguments(RENAME_PARAMS_RVVM1           finz_ x x m1)
+command_arguments(RENAME_PARAMS_RVVM2           finz_ x x m2)
+
 # ALIAS_PARAMS
 
 command_arguments(ALIAS_PARAMS_AVX512F_DP   8 __m512d __m256i e avx512f)
diff --git a/src/libm/sleeflibm_header.h.org.in b/src/libm/sleeflibm_header.h.org.in
index d637b60e..89b3a1ca 100644
--- a/src/libm/sleeflibm_header.h.org.in
+++ b/src/libm/sleeflibm_header.h.org.in
@@ -131,6 +131,18 @@ SLEEF_IMPORT void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx);
 
 //
 
+#if defined(__riscv_v)
+#include <riscv_vector.h>
+typedef vfloat64m2_t Sleef_vfloat64m1_t_2;
+typedef vfloat32m2_t Sleef_vfloat32m1_t_2;
+typedef vfloat64m4_t Sleef_vfloat64m2_t_2;
+typedef vfloat32m4_t Sleef_vfloat32m2_t_2;
+#define Sleef_vfloat64m1_t_2_DEFINED
+#define Sleef_vfloat32m1_t_2_DEFINED
+#define Sleef_vfloat64m2_t_2_DEFINED
+#define Sleef_vfloat32m2_t_2_DEFINED
+#endif
+
 #ifndef Sleef_double2_DEFINED
 #define Sleef_double2_DEFINED
 typedef struct {
diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c
index e531495f..68c13a34 100644
--- a/src/libm/sleefsimddp.c
+++ b/src/libm/sleefsimddp.c
@@ -221,6 +221,33 @@ extern const double Sleef_rempitabdp[];
 #endif
 #endif
 
+// RISC-V
+#ifdef ENABLE_RVVM1
+#define CONFIG 1
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM1.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm1.h"
+#endif
+#endif
+
+#ifdef ENABLE_RVVM2
+#define CONFIG 1
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM2.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm2.h"
+#endif
+#endif
+
 // Generic
 
 #ifdef ENABLE_VECEXT
diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c
index 9e1faa23..5ec5a082 100644
--- a/src/libm/sleefsimdsp.c
+++ b/src/libm/sleefsimdsp.c
@@ -321,6 +321,33 @@ extern const float Sleef_rempitabsp[];
 #endif
 #endif
 
+// RISC-V
+#ifdef ENABLE_RVVM1
+#define CONFIG 1
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM1.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm1.h"
+#endif
+#endif
+
+#ifdef ENABLE_RVVM2
+#define CONFIG 1
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM2.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm2.h"
+#endif
+#endif
+
 // Generic
 
 #ifdef ENABLE_VECEXT
@@ -401,6 +428,7 @@ static INLINE CONST VECTOR_CC vmask vsignbit_vm_vf(vfloat f) {
   return vand_vm_vm_vm(vreinterpret_vm_vf(f), vreinterpret_vm_vf(vcast_vf_f(-0.0f)));
 }
 
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
 static INLINE CONST VECTOR_CC vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) {
   return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y)));
 }
@@ -413,6 +441,7 @@ static INLINE CONST VECTOR_CC vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) {
 static INLINE CONST VECTOR_CC vfloat vsign_vf_vf(vfloat f) {
   return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(1.0f)), vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))));
 }
+#endif
 
 static INLINE CONST VECTOR_CC vopmask vsignbit_vo_vf(vfloat d) {
   return veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vcast_vi2_i(0x80000000)), vcast_vi2_i(0x80000000));
@@ -487,7 +516,7 @@ static INLINE CONST VECTOR_CC vfloat vldexp3_vf_vf_vi2(vfloat d, vint2 q) {
 
 EXPORT CONST VECTOR_CC vfloat xldexpf(vfloat x, vint2 q) { return vldexp_vf_vf_vi2(x, q); }
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
 typedef struct {
   vfloat d;
   vint2 i;
@@ -517,9 +546,11 @@ static dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) {
 }
 #endif
 
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
 static INLINE CONST VECTOR_CC vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) {
   return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y)));
 }
+#endif
 
 static INLINE CONST fi_t rempisubf(vfloat x) {
 #ifdef FULL_FP_ROUNDING
@@ -3290,7 +3321,7 @@ EXPORT CONST VECTOR_CC vfloat xcospif_u05(vfloat d) {
 }
 #endif // #if !defined(DETERMINISTIC)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
   typedef struct {
     vfloat2 a, b;
   } df2;
diff --git a/travis/toolchain-riscv64.cmake b/travis/toolchain-riscv64.cmake
new file mode 100644
index 00000000..bb7b4977
--- /dev/null
+++ b/travis/toolchain-riscv64.cmake
@@ -0,0 +1,9 @@
+set(CMAKE_CROSSCOMPILING    TRUE)
+set(CMAKE_SYSTEM_NAME       "Linux")
+set(CMAKE_SYSTEM_PROCESSOR  "riscv64")
+
+find_program(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-clang)
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

From 46230a3a1ddf25e55171bc531243fc2c4f018ddc Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Thu, 2 Nov 2023 14:45:27 +0000
Subject: [PATCH 03/24] RIVOS: update rvv support to latest intrinsics

- intrinsic functions are now prefixed with __riscv_
- vmerge/vfmerge argument order has changed
---
 src/arch/helperrvv.h | 612 +++++++++++++++++++++----------------------
 1 file changed, 306 insertions(+), 306 deletions(-)

diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h
index fafac723..18fe7642 100644
--- a/src/arch/helperrvv.h
+++ b/src/arch/helperrvv.h
@@ -93,68 +93,68 @@ typedef vint32m4_t dfi_t;
 #define SLEEF_RVV_DP_LMUL 1
 #define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32)
 #define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64)
-#define SLEEF_RVV_SP_VCAST_VF_F vfmv_v_f_f32m1
-#define SLEEF_RVV_SP_VCAST_VI2_I vmv_v_x_i32m1
-#define SLEEF_RVV_SP_VCAST_VU2_U vmv_v_x_u32m1
-#define SLEEF_RVV_SP_VREINTERPRET_VF vreinterpret_f32m1
-#define SLEEF_RVV_SP_VREINTERPRET_VF2 vreinterpret_f32m2
-#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m2
-#define SLEEF_RVV_SP_VREINTERPRET_VI2 vreinterpret_i32m1
-#define SLEEF_RVV_SP_VREINTERPRET_2VI vreinterpret_i32m2
-#define SLEEF_RVV_SP_VREINTERPRET_4VI vreinterpret_i32m4
-#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m1
-#define SLEEF_RVV_SP_VREINTERPRET_VU2 vreinterpret_u32m1
-#define SLEEF_RVV_SP_VGET_VI2 vget_i32m1
-#define SLEEF_RVV_SP_VGET_2VI vget_i32m2
-#define SLEEF_RVV_SP_VGET_VF vget_f32m1
-#define SLEEF_RVV_SP_VGET_VF2 vget_f32m2
-#define SLEEF_RVV_SP_VGET_4VF vget_f32m4
-#define SLEEF_RVV_SP_VGET_VU2 vget_u32m2
-#define SLEEF_RVV_SP_LOAD_VF vle32_v_f32m1
-#define SLEEF_RVV_SP_LOAD_VI2 vle32_v_i32m1
-#define SLEEF_RVV_SP_VCAST_VM_U vmv_v_x_u64m2
-#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m2
-#define SLEEF_RVV_SP_VREINTERPRET_VI64 vreinterpret_i64m2
-#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m1
-#define SLEEF_RVV_SP_LOAD_VI vle32_v_i32m1
-#define SLEEF_RVV_DP_VCAST_VD_D vfmv_v_f_f64m1
-#define SLEEF_RVV_DP_VCAST_VD_VI(x) vfwcvt_f(x, VECTLENDP)
-#define SLEEF_RVV_DP_VCAST_VI_I vmv_v_x_i32mf2
-#define SLEEF_RVV_DP_VCAST_VM_U vmv_v_x_u64m1
-#define SLEEF_RVV_DP_VREINTERPRET_VD vreinterpret_f64m1
-#define SLEEF_RVV_DP_VREINTERPRET_VD2 vreinterpret_f64m2
+#define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m1
+#define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m1
+#define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m1
+#define SLEEF_RVV_SP_VREINTERPRET_VF __riscv_vreinterpret_f32m1
+#define SLEEF_RVV_SP_VREINTERPRET_VF2 __riscv_vreinterpret_f32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1
+#define SLEEF_RVV_SP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2
+#define SLEEF_RVV_SP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4
+#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m1
+#define SLEEF_RVV_SP_VREINTERPRET_VU2 __riscv_vreinterpret_u32m1
+#define SLEEF_RVV_SP_VGET_VI2 __riscv_vget_i32m1
+#define SLEEF_RVV_SP_VGET_2VI __riscv_vget_i32m2
+#define SLEEF_RVV_SP_VGET_VF __riscv_vget_f32m1
+#define SLEEF_RVV_SP_VGET_VF2 __riscv_vget_f32m2
+#define SLEEF_RVV_SP_VGET_4VF __riscv_vget_f32m4
+#define SLEEF_RVV_SP_VGET_VU2 __riscv_vget_u32m2
+#define SLEEF_RVV_SP_LOAD_VF __riscv_vle32_v_f32m1
+#define SLEEF_RVV_SP_LOAD_VI2 __riscv_vle32_v_i32m1
+#define SLEEF_RVV_SP_VCAST_VM_U __riscv_vmv_v_x_u64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m2
+#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m1
+#define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m1
+#define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m1
+#define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP)
+#define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32mf2
+#define SLEEF_RVV_DP_VCAST_VM_U __riscv_vmv_v_x_u64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VD __riscv_vreinterpret_f64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VD2 __riscv_vreinterpret_f64m2
 #define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \
-  vreinterpret_v_i64m2_i32m2(vreinterpret_i64m2(x))
+  __riscv_vreinterpret_v_i64m2_i32m2(__riscv_vreinterpret_i64m2(x))
 #define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \
-  vreinterpret_f64m2(vreinterpret_v_i32m2_i64m2(x))
-#define SLEEF_RVV_DP_VREINTERPRET_4VD vreinterpret_f64m4
+  __riscv_vreinterpret_f64m2(__riscv_vreinterpret_v_i32m2_i64m2(x))
+#define SLEEF_RVV_DP_VREINTERPRET_4VD __riscv_vreinterpret_f64m4
 #define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \
-  vreinterpret_f64m4(vreinterpret_v_i32m4_i64m4(x))
+  __riscv_vreinterpret_f64m4(__riscv_vreinterpret_v_i32m4_i64m4(x))
 #define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \
-  vreinterpret_v_i64m4_i32m4(vreinterpret_i64m4(x))
-#define SLEEF_RVV_DP_VREINTERPRET_VM vreinterpret_u64m1
-#define SLEEF_RVV_DP_VREINTERPRET_VI64 vreinterpret_i64m1
-#define SLEEF_RVV_DP_VREINTERPRET_VU64 vreinterpret_u64m1
-#define SLEEF_RVV_DP_VREINTERPRET_VI vreinterpret_i32mf2
-#define SLEEF_RVV_DP_VREINTERPRET_VI2 vreinterpret_i32m1
-#define SLEEF_RVV_DP_VREINTERPRET_2VI vreinterpret_i32m2
-#define SLEEF_RVV_DP_VREINTERPRET_4VI vreinterpret_i32m4
-#define SLEEF_RVV_DP_VREINTERPRET_8VI vreinterpret_i32m8
-#define SLEEF_RVV_DP_VREINTERPRET_VU vreinterpret_u32mf2
-#define SLEEF_RVV_DP_VREINTERPRET_2VU vreinterpret_u32m2
-#define SLEEF_RVV_DP_VREINTERPRET_4VU vreinterpret_u32m4
-#define SLEEF_RVV_DP_VGET_VM vget_u64m1
-#define SLEEF_RVV_DP_VGET_VD vget_f64m1
-#define SLEEF_RVV_DP_VGET_VD2 vget_f64m2
-#define SLEEF_RVV_DP_VGET_4VD vget_f64m2
-#define SLEEF_RVV_DP_VGET_VI vget_i32m1
-#define SLEEF_RVV_DP_VGET_VI2 vget_i32m1
-#define SLEEF_RVV_DP_VGET_2VI vget_i32m1
-#define SLEEF_RVV_DP_VGET_4VI vget_i32m2
-#define SLEEF_RVV_DP_VGET_8VI vget_i32m4
-#define SLEEF_RVV_DP_VGET_VU vget_u32m1
-#define SLEEF_RVV_DP_LOAD_VD vle64_v_f64m1
-#define SLEEF_RVV_DP_LOAD_VI vle32_v_i32mf2
+  __riscv_vreinterpret_v_i64m4_i32m4(__riscv_vreinterpret_i64m4(x))
+#define SLEEF_RVV_DP_VREINTERPRET_VM __riscv_vreinterpret_u64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VU64 __riscv_vreinterpret_u64m1
+#define SLEEF_RVV_DP_VREINTERPRET_VI __riscv_vreinterpret_i32mf2
+#define SLEEF_RVV_DP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1
+#define SLEEF_RVV_DP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4
+#define SLEEF_RVV_DP_VREINTERPRET_8VI __riscv_vreinterpret_i32m8
+#define SLEEF_RVV_DP_VREINTERPRET_VU __riscv_vreinterpret_u32mf2
+#define SLEEF_RVV_DP_VREINTERPRET_2VU __riscv_vreinterpret_u32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VU __riscv_vreinterpret_u32m4
+#define SLEEF_RVV_DP_VGET_VM __riscv_vget_u64m1
+#define SLEEF_RVV_DP_VGET_VD __riscv_vget_f64m1
+#define SLEEF_RVV_DP_VGET_VD2 __riscv_vget_f64m2
+#define SLEEF_RVV_DP_VGET_4VD __riscv_vget_f64m2
+#define SLEEF_RVV_DP_VGET_VI __riscv_vget_i32m1
+#define SLEEF_RVV_DP_VGET_VI2 __riscv_vget_i32m1
+#define SLEEF_RVV_DP_VGET_2VI __riscv_vget_i32m1
+#define SLEEF_RVV_DP_VGET_4VI __riscv_vget_i32m2
+#define SLEEF_RVV_DP_VGET_8VI __riscv_vget_i32m4
+#define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1
+#define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m1
+#define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32mf2
 
 #else
 
@@ -176,68 +176,68 @@ typedef vint32m8_t dfi_t;
 #define SLEEF_RVV_DP_LMUL 2
 #define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32)
 #define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64)
-#define SLEEF_RVV_SP_VCAST_VF_F vfmv_v_f_f32m2
-#define SLEEF_RVV_SP_VCAST_VI2_I vmv_v_x_i32m2
-#define SLEEF_RVV_SP_VCAST_VU2_U vmv_v_x_u32m2
-#define SLEEF_RVV_SP_VREINTERPRET_VF vreinterpret_f32m2
-#define SLEEF_RVV_SP_VREINTERPRET_VF2 vreinterpret_f32m4
-#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m4
-#define SLEEF_RVV_SP_VREINTERPRET_VI2 vreinterpret_i32m2
-#define SLEEF_RVV_SP_VREINTERPRET_2VI vreinterpret_i32m4
-#define SLEEF_RVV_SP_VREINTERPRET_4VI vreinterpret_i32m8
-#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m2
-#define SLEEF_RVV_SP_VREINTERPRET_VU2 vreinterpret_u32m2
-#define SLEEF_RVV_SP_VGET_VI2 vget_i32m2
-#define SLEEF_RVV_SP_VGET_2VI vget_i32m4
-#define SLEEF_RVV_SP_VGET_VF vget_f32m2
-#define SLEEF_RVV_SP_VGET_VF2 vget_f32m4
-#define SLEEF_RVV_SP_VGET_4VF vget_f32m8
-#define SLEEF_RVV_SP_VGET_VU2 vget_u32m4
-#define SLEEF_RVV_SP_LOAD_VF vle32_v_f32m2
-#define SLEEF_RVV_SP_LOAD_VI2 vle32_v_i32m2
-#define SLEEF_RVV_SP_VCAST_VM_U vmv_v_x_u64m4
-#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m4
-#define SLEEF_RVV_SP_VREINTERPRET_VI64 vreinterpret_i64m4
-#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m2
-#define SLEEF_RVV_SP_LOAD_VI vle32_v_i32m2
-#define SLEEF_RVV_DP_VCAST_VD_D vfmv_v_f_f64m2
-#define SLEEF_RVV_DP_VCAST_VD_VI(x) vfwcvt_f(x, VECTLENDP)
-#define SLEEF_RVV_DP_VCAST_VI_I vmv_v_x_i32m1
-#define SLEEF_RVV_DP_VCAST_VM_U vmv_v_x_u64m2
-#define SLEEF_RVV_DP_VREINTERPRET_VD vreinterpret_f64m2
-#define SLEEF_RVV_DP_VREINTERPRET_VD2 vreinterpret_f64m4
+#define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m2
+#define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m2
+#define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VF __riscv_vreinterpret_f32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VF2 __riscv_vreinterpret_f32m4
+#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m2
+#define SLEEF_RVV_SP_VREINTERPRET_2VI __riscv_vreinterpret_i32m4
+#define SLEEF_RVV_SP_VREINTERPRET_4VI __riscv_vreinterpret_i32m8
+#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m2
+#define SLEEF_RVV_SP_VREINTERPRET_VU2 __riscv_vreinterpret_u32m2
+#define SLEEF_RVV_SP_VGET_VI2 __riscv_vget_i32m2
+#define SLEEF_RVV_SP_VGET_2VI __riscv_vget_i32m4
+#define SLEEF_RVV_SP_VGET_VF __riscv_vget_f32m2
+#define SLEEF_RVV_SP_VGET_VF2 __riscv_vget_f32m4
+#define SLEEF_RVV_SP_VGET_4VF __riscv_vget_f32m8
+#define SLEEF_RVV_SP_VGET_VU2 __riscv_vget_u32m4
+#define SLEEF_RVV_SP_LOAD_VF __riscv_vle32_v_f32m2
+#define SLEEF_RVV_SP_LOAD_VI2 __riscv_vle32_v_i32m2
+#define SLEEF_RVV_SP_VCAST_VM_U __riscv_vmv_v_x_u64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m4
+#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m2
+#define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m2
+#define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m2
+#define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP)
+#define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32m1
+#define SLEEF_RVV_DP_VCAST_VM_U __riscv_vmv_v_x_u64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VD __riscv_vreinterpret_f64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VD2 __riscv_vreinterpret_f64m4
 #define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \
-  vreinterpret_v_i64m4_i32m4(vreinterpret_i64m4(x))
+  __riscv_vreinterpret_v_i64m4_i32m4(__riscv_vreinterpret_i64m4(x))
 #define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \
-  vreinterpret_f64m4(vreinterpret_v_i32m4_i64m4(x))
-#define SLEEF_RVV_DP_VREINTERPRET_4VD vreinterpret_f64m8
+  __riscv_vreinterpret_f64m4(__riscv_vreinterpret_v_i32m4_i64m4(x))
+#define SLEEF_RVV_DP_VREINTERPRET_4VD __riscv_vreinterpret_f64m8
 #define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \
-  vreinterpret_f64m8(vreinterpret_v_i32m8_i64m8(x))
+  __riscv_vreinterpret_f64m8(__riscv_vreinterpret_v_i32m8_i64m8(x))
 #define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \
-  vreinterpret_v_i64m8_i32m8(vreinterpret_i64m8(x))
-#define SLEEF_RVV_DP_VREINTERPRET_VM vreinterpret_u64m2
-#define SLEEF_RVV_DP_VREINTERPRET_VI64 vreinterpret_i64m2
-#define SLEEF_RVV_DP_VREINTERPRET_VU64 vreinterpret_u64m2
-#define SLEEF_RVV_DP_VREINTERPRET_VI vreinterpret_i32m1
-#define SLEEF_RVV_DP_VREINTERPRET_VI2 vreinterpret_i32m1
-#define SLEEF_RVV_DP_VREINTERPRET_2VI vreinterpret_i32m2
-#define SLEEF_RVV_DP_VREINTERPRET_4VI vreinterpret_i32m4
-#define SLEEF_RVV_DP_VREINTERPRET_8VI vreinterpret_i32m8
-#define SLEEF_RVV_DP_VREINTERPRET_VU vreinterpret_u32m1
-#define SLEEF_RVV_DP_VREINTERPRET_2VU vreinterpret_u32m2
-#define SLEEF_RVV_DP_VREINTERPRET_4VU vreinterpret_u32m4
-#define SLEEF_RVV_DP_VGET_VM vget_u64m2
-#define SLEEF_RVV_DP_VGET_VD vget_f64m2
-#define SLEEF_RVV_DP_VGET_VD2 vget_f64m4
-#define SLEEF_RVV_DP_VGET_4VD vget_f64m4
-#define SLEEF_RVV_DP_VGET_VI vget_i32m1
-#define SLEEF_RVV_DP_VGET_VI2 vget_i32m1
-#define SLEEF_RVV_DP_VGET_2VI vget_i32m2
-#define SLEEF_RVV_DP_VGET_4VI vget_i32m4
-#define SLEEF_RVV_DP_VGET_8VI vget_i32m8
-#define SLEEF_RVV_DP_VGET_VU vget_u32m1
-#define SLEEF_RVV_DP_LOAD_VD vle64_v_f64m2
-#define SLEEF_RVV_DP_LOAD_VI vle32_v_i32m1
+  __riscv_vreinterpret_v_i64m8_i32m8(__riscv_vreinterpret_i64m8(x))
+#define SLEEF_RVV_DP_VREINTERPRET_VM __riscv_vreinterpret_u64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VU64 __riscv_vreinterpret_u64m2
+#define SLEEF_RVV_DP_VREINTERPRET_VI __riscv_vreinterpret_i32m1
+#define SLEEF_RVV_DP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1
+#define SLEEF_RVV_DP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4
+#define SLEEF_RVV_DP_VREINTERPRET_8VI __riscv_vreinterpret_i32m8
+#define SLEEF_RVV_DP_VREINTERPRET_VU __riscv_vreinterpret_u32m1
+#define SLEEF_RVV_DP_VREINTERPRET_2VU __riscv_vreinterpret_u32m2
+#define SLEEF_RVV_DP_VREINTERPRET_4VU __riscv_vreinterpret_u32m4
+#define SLEEF_RVV_DP_VGET_VM __riscv_vget_u64m2
+#define SLEEF_RVV_DP_VGET_VD __riscv_vget_f64m2
+#define SLEEF_RVV_DP_VGET_VD2 __riscv_vget_f64m4
+#define SLEEF_RVV_DP_VGET_4VD __riscv_vget_f64m4
+#define SLEEF_RVV_DP_VGET_VI __riscv_vget_i32m1
+#define SLEEF_RVV_DP_VGET_VI2 __riscv_vget_i32m1
+#define SLEEF_RVV_DP_VGET_2VI __riscv_vget_i32m2
+#define SLEEF_RVV_DP_VGET_4VI __riscv_vget_i32m4
+#define SLEEF_RVV_DP_VGET_8VI __riscv_vget_i32m8
+#define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1
+#define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m2
+#define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32m1
 
 #endif // ENABLE_RVVM1
 
@@ -257,8 +257,8 @@ static INLINE vint2 figeti_vi2_di(fi_t d) {
 }
 static INLINE fi_t fisetdi_fi_vf_vi2(vfloat d, vint2 i) {
   fi_t res;
-  res = vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_VI2(d));
-  res = vset(res, 1, i);
+  res = __riscv_vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_VI2(d));
+  res = __riscv_vset(res, 1, i);
   return res;
 }
 static INLINE vfloat2 dfigetdf_vf2_dfi(dfi_t d) {
@@ -269,12 +269,12 @@ static INLINE vint2 dfigeti_vi2_dfi(dfi_t d) {
 }
 static INLINE dfi_t dfisetdfi_dfi_vf2_vi2(vfloat2 v, vint2 i) {
   dfi_t res;
-  res = vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v));
-  res = vset(res, 2, i);
+  res = __riscv_vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v));
+  res = __riscv_vset(res, 2, i);
   return res;
 }
 static INLINE dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) {
-  return vset(dfi, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v));
+  return __riscv_vset(dfi, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v));
 }
 // vfloat2 type
 static INLINE vfloat vf2getx_vf_vf2(vfloat2 v) {
@@ -285,21 +285,21 @@ static INLINE vfloat vf2gety_vf_vf2(vfloat2 v) {
 }
 static INLINE vfloat2 vf2setxy_vf2_vf_vf(vfloat x, vfloat y) {
   vfloat2 res;
-  res = vset(res, 0, x);
-  res = vset(res, 1, y);
+  res = __riscv_vset(res, 0, x);
+  res = __riscv_vset(res, 1, y);
   return res;
 }
 static INLINE vfloat2 vf2setx_vf2_vf2_vf(vfloat2 v, vfloat d) {
-  return vset(v, 0, d);
+  return __riscv_vset(v, 0, d);
 }
 static INLINE vfloat2 vf2sety_vf2_vf2_vf(vfloat2 v, vfloat d) {
-  return vset(v, 1, d);
+  return __riscv_vset(v, 1, d);
 }
 // df2 type
 static df2 df2setab_df2_vf2_vf2(vfloat2 a, vfloat2 b) {
   df2 res;
-  res = vset(res, 0, a);
-  res = vset(res, 1, b);
+  res = __riscv_vset(res, 0, a);
+  res = __riscv_vset(res, 1, b);
   return res;
 }
 static vfloat2 df2geta_vf2_df2(df2 d) { return SLEEF_RVV_SP_VGET_VF2(d, 0); }
@@ -322,13 +322,13 @@ static INLINE vfloat vrint_vf_vf(vfloat vd) {
   // It is not currently possible to safely set frm for intrinsics,
   // so emulate round-to-nearest behavior
   vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP);
-  half = vfsgnj(half, vd, VECTLENSP);
-  vfloat res = vfadd(vd, half, VECTLENSP);
-  vint2 i = vfcvt_rtz_x(res, VECTLENSP);
-  return vfcvt_f(i, VECTLENSP);
+  half = __riscv_vfsgnj(half, vd, VECTLENSP);
+  vfloat res = __riscv_vfadd(vd, half, VECTLENSP);
+  vint2 i = __riscv_vfcvt_rtz_x(res, VECTLENSP);
+  return __riscv_vfcvt_f(i, VECTLENSP);
 }
 static INLINE vfloat vcast_vf_vi2(vint2 vi) {
-  return vfcvt_f(vi, VECTLENSP);
+  return __riscv_vfcvt_f(vi, VECTLENSP);
 }
 static INLINE vint2 vcast_vi2_i(int i) {
   return SLEEF_RVV_SP_VCAST_VI2_I(i, VECTLENSP);
@@ -337,12 +337,12 @@ static INLINE vint2 vrint_vi2_vf(vfloat vf) {
   // It is not currently possible to safely set frm for intrinsics,
   // so emulate round-to-nearest behavior
   vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP);
-  half = vfsgnj(half, vf, VECTLENSP);
-  vfloat res = vfadd(vf, half, VECTLENSP);
-  return vfcvt_rtz_x(res, VECTLENSP);
+  half = __riscv_vfsgnj(half, vf, VECTLENSP);
+  vfloat res = __riscv_vfadd(vf, half, VECTLENSP);
+  return __riscv_vfcvt_rtz_x(res, VECTLENSP);
 }
 static INLINE vint2 vtruncate_vi2_vf(vfloat vf) {
-  return vfcvt_rtz_x(vf, VECTLENSP);
+  return __riscv_vfcvt_rtz_x(vf, VECTLENSP);
 }
 static INLINE vfloat vtruncate_vf_vf(vfloat vf) {
   return vcast_vf_vi2(vtruncate_vi2_vf(vf));
@@ -359,16 +359,16 @@ static INLINE vfloat vloadu_vf_p(const float *ptr) {
   return SLEEF_RVV_SP_LOAD_VF(ptr, VECTLENSP);
 }
 static INLINE void vstore_v_p_vf(float *ptr, vfloat v) {
-  vse32(ptr, v, VECTLENSP);
+  __riscv_vse32(ptr, v, VECTLENSP);
 }
 static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) {
-  vse32(ptr, v, VECTLENSP);
+  __riscv_vse32(ptr, v, VECTLENSP);
 }
 static INLINE void vstoreu_v_p_vi2(int32_t *ptr, vint2 v) {
-  vse32(ptr, v, VECTLENSP);
+  __riscv_vse32(ptr, v, VECTLENSP);
 }
 static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) {
-  return vluxei32(ptr, vmul(SLEEF_RVV_SP_VREINTERPRET_VU(vi2), sizeof(float), VECTLENSP), VECTLENSP);
+  return __riscv_vluxei32(ptr, __riscv_vmul(SLEEF_RVV_SP_VREINTERPRET_VU(vi2), sizeof(float), VECTLENSP), VECTLENSP);
 }
 
 
@@ -376,58 +376,58 @@ static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) {
 /* Floating-Point Arithmetic            */
 /****************************************/
 static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) {
-  return vfadd(x, y, VECTLENSP);
+  return __riscv_vfadd(x, y, VECTLENSP);
 }
 static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) {
-  return vfsub(x, y, VECTLENSP);
+  return __riscv_vfsub(x, y, VECTLENSP);
 }
 static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) {
-  return vfmul(x, y, VECTLENSP);
+  return __riscv_vfmul(x, y, VECTLENSP);
 }
 static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) {
-  return vfdiv(x, y, VECTLENSP);
+  return __riscv_vfdiv(x, y, VECTLENSP);
 }
 static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) {
-  return vfmax(x, y, VECTLENSP);
+  return __riscv_vfmax(x, y, VECTLENSP);
 }
 static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) {
-  return vfmin(x, y, VECTLENSP);
+  return __riscv_vfmin(x, y, VECTLENSP);
 }
 static INLINE vfloat vrec_vf_vf(vfloat d) {
-  return vfdiv(vcast_vf_f(1.0f), d, VECTLENSP);
+  return __riscv_vfdiv(vcast_vf_f(1.0f), d, VECTLENSP);
 }
 static INLINE vfloat vsqrt_vf_vf(vfloat d) {
-  return vfsqrt(d, VECTLENSP);
+  return __riscv_vfsqrt(d, VECTLENSP);
 }
 // fused multiply-add/subtract
 static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
-  return vfmadd(x, y, z, VECTLENSP);
+  return __riscv_vfmadd(x, y, z, VECTLENSP);
 }
 static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
-  return vfnmsub(x, y, z, VECTLENSP);
+  return __riscv_vfnmsub(x, y, z, VECTLENSP);
 }
 // sign manipulation
 static INLINE vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) {
-  return vfsgnjx(x, y, VECTLENSP);
+  return __riscv_vfsgnjx(x, y, VECTLENSP);
 }
 static INLINE vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) {
-  return vfsgnj(x, y, VECTLENSP);
+  return __riscv_vfsgnj(x, y, VECTLENSP);
 }
 static INLINE vfloat vsign_vf_vf(vfloat f) {
-  return vfsgnj(SLEEF_RVV_SP_VCAST_VF_F(1.0f, VECTLENSP), f, VECTLENSP);
+  return __riscv_vfsgnj(SLEEF_RVV_SP_VCAST_VF_F(1.0f, VECTLENSP), f, VECTLENSP);
 }
 static INLINE vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) {
   vint2 xi = SLEEF_RVV_SP_VREINTERPRET_VI2(x);
   vint2 yi = SLEEF_RVV_SP_VREINTERPRET_VI2(y);
-  vint2 xioryi = vor(xi, yi, VECTLENSP);
+  vint2 xioryi = __riscv_vor(xi, yi, VECTLENSP);
   vfloat xory = SLEEF_RVV_SP_VREINTERPRET_VF(xioryi);
-  return vfsgnj(x, xory, VECTLENSP);
+  return __riscv_vfsgnj(x, xory, VECTLENSP);
 }
 static INLINE vfloat vabs_vf_vf(vfloat f) {
-  return vfabs(f, VECTLENSP);
+  return __riscv_vfabs(f, VECTLENSP);
 }
 static INLINE vfloat vneg_vf_vf(vfloat f) {
-  return vfneg(f, VECTLENSP);
+  return __riscv_vfneg(f, VECTLENSP);
 }
 
 
@@ -435,34 +435,34 @@ static INLINE vfloat vneg_vf_vf(vfloat f) {
 /* Integer Arithmetic and Logic         */
 /****************************************/
 static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) {
-  return vadd(x, y, VECTLENSP);
+  return __riscv_vadd(x, y, VECTLENSP);
 }
 static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) {
-  return vsub(x, y, VECTLENSP);
+  return __riscv_vsub(x, y, VECTLENSP);
 }
 static INLINE vint2 vneg_vi2_vi2(vint2 x) {
-  return vneg(x, VECTLENSP);
+  return __riscv_vneg(x, VECTLENSP);
 }
 static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) {
-  return vand(x, y, VECTLENSP);
+  return __riscv_vand(x, y, VECTLENSP);
 }
 static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) {
-  return vand(vnot(x, VECTLENSP), y, VECTLENSP);
+  return __riscv_vand(__riscv_vnot(x, VECTLENSP), y, VECTLENSP);
 }
 static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) {
-  return vor(x, y, VECTLENSP);
+  return __riscv_vor(x, y, VECTLENSP);
 }
 static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) {
-  return vxor(x, y, VECTLENSP);
+  return __riscv_vxor(x, y, VECTLENSP);
 }
 static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) {
-  return vsll(x, c, VECTLENSP);
+  return __riscv_vsll(x, c, VECTLENSP);
 }
 static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) {
-  return vsra(x, c, VECTLENSP);
+  return __riscv_vsra(x, c, VECTLENSP);
 }
 static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) {
-  return SLEEF_RVV_SP_VREINTERPRET_VI2(vsrl(SLEEF_RVV_SP_VREINTERPRET_VU2(x), c, VECTLENSP));
+  return SLEEF_RVV_SP_VREINTERPRET_VI2(__riscv_vsrl(SLEEF_RVV_SP_VREINTERPRET_VU2(x), c, VECTLENSP));
 }
 
 #ifdef ENABLE_RVV_SP
@@ -470,37 +470,37 @@ static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) {
 /* Bitmask Operations                   */
 /****************************************/
 static INLINE vfloat vreinterpret_vf_vm(vmask vm) {
-  return SLEEF_RVV_SP_VREINTERPRET_VF(vncvt_x(vm, VECTLENSP));
+  return SLEEF_RVV_SP_VREINTERPRET_VF(__riscv_vncvt_x(vm, VECTLENSP));
 }
 static INLINE vmask vreinterpret_vm_vf(vfloat vf) {
-  return vwcvtu_x(SLEEF_RVV_SP_VREINTERPRET_VU(vf), VECTLENSP);
+  return __riscv_vwcvtu_x(SLEEF_RVV_SP_VREINTERPRET_VU(vf), VECTLENSP);
 }
 static INLINE int vtestallones_i_vo32(vopmask g) {
-  return vcpop(g, VECTLENSP) == VECTLENSP;
+  return __riscv_vcpop(g, VECTLENSP) == VECTLENSP;
 }
 static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) {
   return SLEEF_RVV_SP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENSP);
 }
 static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) {
-  return vand(x, y, VECTLENSP);
+  return __riscv_vand(x, y, VECTLENSP);
 }
 static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) {
-  return vor(x, y, VECTLENSP);
+  return __riscv_vor(x, y, VECTLENSP);
 }
 static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) {
-  return vxor(x, y, VECTLENSP);
+  return __riscv_vxor(x, y, VECTLENSP);
 }
 static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) {
-  return vand(SLEEF_RVV_SP_VREINTERPRET_VM(vnot(SLEEF_RVV_SP_VREINTERPRET_VI64(x), VECTLENSP)), y, VECTLENSP);
+  return __riscv_vand(SLEEF_RVV_SP_VREINTERPRET_VM(__riscv_vnot(SLEEF_RVV_SP_VREINTERPRET_VI64(x), VECTLENSP)), y, VECTLENSP);
 }
 static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) {
-  return vmerge(x, y, -1, VECTLENSP);
+  return __riscv_vmerge(y, -1, x, VECTLENSP);
 }
 static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) {
-  return vmerge(vmnot(x, VECTLENSP), y, 0, VECTLENSP);
+  return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENSP), VECTLENSP);
 }
 static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) {
-  return vmerge(x, y, 0, VECTLENSP);
+  return __riscv_vmerge(y, 0, x, VECTLENSP);
 }
 
 
@@ -508,75 +508,75 @@ static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) {
 /* Logical Mask Operations              */
 /****************************************/
 static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) {
-  return vmand(x, y, VECTLENSP);
+  return __riscv_vmand(x, y, VECTLENSP);
 }
 static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) {
-  return vmandn(y, x, VECTLENSP);
+  return __riscv_vmandn(y, x, VECTLENSP);
 }
 static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) {
-  return vmor(x, y, VECTLENSP);
+  return __riscv_vmor(x, y, VECTLENSP);
 }
 static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) {
-  return vmxor(x, y, VECTLENSP);
+  return __riscv_vmxor(x, y, VECTLENSP);
 }
 // single precision FP comparison
 static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) {
-  return vmfeq(x, y, VECTLENSP);
+  return __riscv_vmfeq(x, y, VECTLENSP);
 }
 static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) {
-  return vmfne(x, y, VECTLENSP);
+  return __riscv_vmfne(x, y, VECTLENSP);
 }
 static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) {
-  return vmfgt(x, y, VECTLENSP);
+  return __riscv_vmfgt(x, y, VECTLENSP);
 }
 static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) {
-  return vmfge(x, y, VECTLENSP);
+  return __riscv_vmfge(x, y, VECTLENSP);
 }
 static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) {
-  return vmflt(x, y, VECTLENSP);
+  return __riscv_vmflt(x, y, VECTLENSP);
 }
 static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) {
-  return vmfle(x, y, VECTLENSP);
+  return __riscv_vmfle(x, y, VECTLENSP);
 }
 static INLINE vopmask visnan_vo_vf(vfloat d) {
-  return vmfne(d, d, VECTLENSP);
+  return __riscv_vmfne(d, d, VECTLENSP);
 }
 static INLINE vopmask visinf_vo_vf(vfloat d) {
-  return vmfeq(vfabs(d, VECTLENSP), SLEEF_INFINITYf, VECTLENSP);
+  return __riscv_vmfeq(__riscv_vfabs(d, VECTLENSP), SLEEF_INFINITYf, VECTLENSP);
 }
 static INLINE vopmask vispinf_vo_vf(vfloat d) {
-  return vmfeq(d, SLEEF_INFINITYf, VECTLENSP);
+  return __riscv_vmfeq(d, SLEEF_INFINITYf, VECTLENSP);
 }
 // conditional select
 static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) {
-  return vmerge(mask, y, x, VECTLENSP);
+  return __riscv_vmerge(y, x, mask, VECTLENSP);
 }
 static INLINE vfloat vsel_vf_vo_f_f(vopmask mask, float v1, float v0) {
-  return vfmerge(mask, vcast_vf_f(v0), v1, VECTLENSP);
+  return __riscv_vfmerge(vcast_vf_f(v0), v1, mask, VECTLENSP);
 }
 static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) {
-  return vfmerge(o0, vfmerge(o1, vcast_vf_f(d2), d1, VECTLENSP), d0, VECTLENSP);
+  return __riscv_vfmerge(__riscv_vfmerge(vcast_vf_f(d2), d1, o1, VECTLENSP), d0, o0, VECTLENSP);
 }
 static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) {
-  return vfmerge(o0, vfmerge(o1, vfmerge(o2, vcast_vf_f(d3), d2, VECTLENSP), d1, VECTLENSP), d0, VECTLENSP);
+  return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vf_f(d3), d2, o2, VECTLENSP), d1, o1, VECTLENSP), d0, o0, VECTLENSP);
 }
 // integer comparison
 static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) {
-  return vmseq(x, y, VECTLENSP);
+  return __riscv_vmseq(x, y, VECTLENSP);
 }
 static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) {
-  return vmsgt(x, y, VECTLENSP);
+  return __riscv_vmsgt(x, y, VECTLENSP);
 }
 static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) {
   vint2 zero = vcast_vi2_i(0);
-  return vmerge(vmsgt(x, y, VECTLENSP), zero, -1, VECTLENSP);
+  return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, VECTLENSP), VECTLENSP);
 }
 // integer conditional select
 static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) {
-  return vmerge(m, y, x, VECTLENSP);
+  return __riscv_vmerge(y, x, m, VECTLENSP);
 }
 static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) {
-  return vmerge(vmnot(x, VECTLENSP), y, 0, VECTLENSP);
+  return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENSP), VECTLENSP);
 }
 #endif // ENABLE_RVV_SP
 
@@ -597,21 +597,21 @@ static INLINE const vdouble vd2gety_vd_vd2(vdouble2 v) {
 }
 static INLINE const vdouble2 vd2setxy_vd2_vd_vd(vdouble x, vdouble y) {
   vdouble2 res;
-  res = vset(res, 0, x);
-  res = vset(res, 1, y);
+  res = __riscv_vset(res, 0, x);
+  res = __riscv_vset(res, 1, y);
   return res;
 }
 static INLINE const vdouble2 vd2setx_vd2_vd2_vd(vdouble2 v, vdouble d) {
-  return vset(v, 0, d);
+  return __riscv_vset(v, 0, d);
 }
 static INLINE const vdouble2 vd2sety_vd2_vd2_vd(vdouble2 v, vdouble d) {
-  return vset(v, 1, d);
+  return __riscv_vset(v, 1, d);
 }
 // dd2 type
 static dd2 dd2setab_dd2_vd2_vd2(vdouble2 a, vdouble2 b) {
   dd2 res;
-  res = vset(res, 0, a);
-  res = vset(res, 1, b);
+  res = __riscv_vset(res, 0, a);
+  res = __riscv_vset(res, 1, b);
   return res;
 }
 static vdouble2 dd2geta_vd2_dd2(dd2 d) { return SLEEF_RVV_DP_VGET_4VD(d, 0); }
@@ -622,32 +622,32 @@ static INLINE vdouble vd3gety_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v
 static INLINE vdouble vd3getz_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 2); }
 static INLINE vdouble3 vd3setxyz_vd3_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
   vdouble3 res;
-  res = vset(res, 0, x);
-  res = vset(res, 1, y);
-  res = vset(res, 2, z);
+  res = __riscv_vset(res, 0, x);
+  res = __riscv_vset(res, 1, y);
+  res = __riscv_vset(res, 2, z);
   return res;
 }
-static INLINE vdouble3 vd3setx_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 0, d); }
-static INLINE vdouble3 vd3sety_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 1, d); }
-static INLINE vdouble3 vd3setz_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 2, d); }
+static INLINE vdouble3 vd3setx_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 0, d); }
+static INLINE vdouble3 vd3sety_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 1, d); }
+static INLINE vdouble3 vd3setz_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 2, d); }
 // di type
 static INLINE vdouble digetd_vd_di(di_t d) {
   return SLEEF_RVV_DP_VGET_VD(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(d), 0);
 }
 static INLINE vint digeti_vi_di(di_t d) {
 #ifdef ENABLE_RVVM1
-  return vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 1));
+  return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 1));
 #else
   return SLEEF_RVV_DP_VGET_VI(d, 2);
 #endif
 }
 static INLINE di_t disetdi_di_vd_vi(vdouble d, vint i) {
   di_t res;
-  res = SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(vset(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(res), 0, d));
+  res = SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(res), 0, d));
 #ifdef ENABLE_RVVM1
-  res = vset(res, 1, vlmul_ext_i32m1(i));
+  res = __riscv_vset(res, 1, __riscv_vlmul_ext_i32m1(i));
 #else
-  res = vset(res, 2, i);
+  res = __riscv_vset(res, 2, i);
 #endif
   return res;
 }
@@ -657,23 +657,23 @@ static INLINE vdouble2 ddigetdd_vd2_ddi(ddi_t d) {
 }
 static INLINE vint ddigeti_vi_ddi(ddi_t d) {
 #ifdef ENABLE_RVVM1
-  return vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 2));
+  return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 2));
 #else
   return SLEEF_RVV_DP_VGET_VI(d, 4);
 #endif
 }
 static INLINE ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) {
   ddi_t res;
-  res = SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(res), 0, v));
+  res = SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(res), 0, v));
 #ifdef ENABLE_RVVM1
-  res = vset(res, 2, vlmul_ext_i32m1(i));
+  res = __riscv_vset(res, 2, __riscv_vlmul_ext_i32m1(i));
 #else
-  res = vset(res, 4, i);
+  res = __riscv_vset(res, 4, i);
 #endif
   return res;
 }
 static INLINE ddi_t ddisetdd_ddi_ddi_vd2(ddi_t ddi, vdouble2 v) {
-  return SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(ddi), 0, v));
+  return SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(ddi), 0, v));
 }
 
 /****************************************/
@@ -692,20 +692,20 @@ static INLINE vint vrint_vi_vd(vdouble vd) {
   // It is not currently possible to safely set frm for intrinsics,
   // so emulate round-to-nearest behavior
   vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP);
-  half = vfsgnj(half, vd, VECTLENDP);
-  vdouble res = vfadd(vd, half, VECTLENDP);
-  return vfncvt_rtz_x(res, VECTLENDP);
+  half = __riscv_vfsgnj(half, vd, VECTLENDP);
+  vdouble res = __riscv_vfadd(vd, half, VECTLENDP);
+  return __riscv_vfncvt_rtz_x(res, VECTLENDP);
 }
 static INLINE vdouble vrint_vd_vd(vdouble vd) {
   // It is not currently possible to safely set frm for intrinsics,
   // so emulate round-to-nearest behavior
   vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP);
-  half = vfsgnj(half, vd, VECTLENDP);
-  vdouble res = vfadd(vd, half, VECTLENDP);
-  return vfwcvt_f(vfncvt_rtz_x(res, VECTLENDP), VECTLENDP);
+  half = __riscv_vfsgnj(half, vd, VECTLENDP);
+  vdouble res = __riscv_vfadd(vd, half, VECTLENDP);
+  return __riscv_vfwcvt_f(__riscv_vfncvt_rtz_x(res, VECTLENDP), VECTLENDP);
 }
 static INLINE vint vtruncate_vi_vd(vdouble vd) {
-  return vfncvt_rtz_x(vd, VECTLENDP);
+  return __riscv_vfncvt_rtz_x(vd, VECTLENDP);
 }
 static INLINE vdouble vtruncate_vd_vd(vdouble vd) {
   return vcast_vd_vi(vtruncate_vi_vd(vd));
@@ -725,16 +725,16 @@ static INLINE vint vloadu_vi_p(int32_t *p) {
   return SLEEF_RVV_DP_LOAD_VI(p, VECTLENDP);
 }
 static INLINE void vstore_v_p_vd(double *ptr, vdouble v) {
-  vse64(ptr, v, VECTLENDP);
+  __riscv_vse64(ptr, v, VECTLENDP);
 }
 static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) {
-  vse64(ptr, v, VECTLENDP);
+  __riscv_vse64(ptr, v, VECTLENDP);
 }
 static INLINE void vstoreu_v_p_vi(int32_t *ptr, vint v) {
-  vse32(ptr, v, VECTLENDP);
+  __riscv_vse32(ptr, v, VECTLENDP);
 }
 static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) {
-  return vluxei64(ptr, vwmulu(SLEEF_RVV_DP_VREINTERPRET_VU(vi), sizeof(double), VECTLENDP), VECTLENDP);
+  return __riscv_vluxei64(ptr, __riscv_vwmulu(SLEEF_RVV_DP_VREINTERPRET_VU(vi), sizeof(double), VECTLENDP), VECTLENDP);
 }
 
 
@@ -742,60 +742,60 @@ static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) {
 /* Floating-Point Arithmetic            */
 /****************************************/
 static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) {
-  return vfadd(x, y, VECTLENDP);
+  return __riscv_vfadd(x, y, VECTLENDP);
 }
 static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) {
-  return vfsub(x, y, VECTLENDP);
+  return __riscv_vfsub(x, y, VECTLENDP);
 }
 static INLINE vdouble vrec_vd_vd(vdouble d) {
-  return vfdiv(vcast_vd_d(1.0), d, VECTLENDP);
+  return __riscv_vfdiv(vcast_vd_d(1.0), d, VECTLENDP);
 }
 static INLINE vdouble vabs_vd_vd(vdouble d) {
-  return vfabs(d, VECTLENDP);
+  return __riscv_vfabs(d, VECTLENDP);
 }
 static INLINE vdouble vsqrt_vd_vd(vdouble d) {
-  return vfsqrt(d, VECTLENDP);
+  return __riscv_vfsqrt(d, VECTLENDP);
 }
 static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) {
-  return vfmul(x, y, VECTLENDP);
+  return __riscv_vfmul(x, y, VECTLENDP);
 }
 static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) {
-  return vfdiv(x, y, VECTLENDP);
+  return __riscv_vfdiv(x, y, VECTLENDP);
 }
 static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) {
-  return vfmax(x, y, VECTLENDP);
+  return __riscv_vfmax(x, y, VECTLENDP);
 }
 static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) {
-  return vfmin(x, y, VECTLENDP);
+  return __riscv_vfmin(x, y, VECTLENDP);
 }
 // fused multiply add / sub
 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
-  return vfmadd(x, y, z, VECTLENDP);
+  return __riscv_vfmadd(x, y, z, VECTLENDP);
 }
 static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
-  return vfmsub(x, y, z, VECTLENDP);
+  return __riscv_vfmsub(x, y, z, VECTLENDP);
 }
 static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
-  return vfmadd(x, y, z, VECTLENDP);
+  return __riscv_vfmadd(x, y, z, VECTLENDP);
 }
 static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
-  return vfnmsub(x, y, z, VECTLENDP);
+  return __riscv_vfnmsub(x, y, z, VECTLENDP);
 }
 static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
-  return vfmsub(x, y, z, VECTLENDP);
+  return __riscv_vfmsub(x, y, z, VECTLENDP);
 }
 // sign manipulation
 static INLINE vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) {
-  return vfsgnjx(x, y, VECTLENDP);
+  return __riscv_vfsgnjx(x, y, VECTLENDP);
 }
 static INLINE vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) {
-  return vfsgnj(x, y, VECTLENDP);
+  return __riscv_vfsgnj(x, y, VECTLENDP);
 }
 static INLINE vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) {
-  return vfsgnj(x, SLEEF_RVV_DP_VREINTERPRET_VD(vor(SLEEF_RVV_DP_VREINTERPRET_VM(x), SLEEF_RVV_DP_VREINTERPRET_VM(y), VECTLENDP)), VECTLENDP);
+  return __riscv_vfsgnj(x, SLEEF_RVV_DP_VREINTERPRET_VD(__riscv_vor(SLEEF_RVV_DP_VREINTERPRET_VM(x), SLEEF_RVV_DP_VREINTERPRET_VM(y), VECTLENDP)), VECTLENDP);
 }
 static INLINE vdouble vneg_vd_vd(vdouble d) {
-  return vfneg(d, VECTLENDP);
+  return __riscv_vfneg(d, VECTLENDP);
 }
 
 
@@ -803,34 +803,34 @@ static INLINE vdouble vneg_vd_vd(vdouble d) {
 /* Integer Arithmetic and Logic         */
 /****************************************/
 static INLINE vint vadd_vi_vi_vi(vint x, vint y) {
-  return vadd(x, y, VECTLENDP);
+  return __riscv_vadd(x, y, VECTLENDP);
 }
 static INLINE vint vsub_vi_vi_vi(vint x, vint y) {
-  return vsub(x, y, VECTLENDP);
+  return __riscv_vsub(x, y, VECTLENDP);
 }
 static INLINE vint vneg_vi_vi(vint x) {
-  return vneg(x, VECTLENDP);
+  return __riscv_vneg(x, VECTLENDP);
 }
 static INLINE vint vand_vi_vi_vi(vint x, vint y) {
-  return vand(x, y, VECTLENDP);
+  return __riscv_vand(x, y, VECTLENDP);
 }
 static INLINE vint vandnot_vi_vi_vi(vint x, vint y) {
-  return vand(vnot(x, VECTLENDP), y, VECTLENDP);
+  return __riscv_vand(__riscv_vnot(x, VECTLENDP), y, VECTLENDP);
 }
 static INLINE vint vor_vi_vi_vi(vint x, vint y) {
-  return vor(x, y, VECTLENDP);
+  return __riscv_vor(x, y, VECTLENDP);
 }
 static INLINE vint vxor_vi_vi_vi(vint x, vint y) {
-  return vxor(x, y, VECTLENDP);
+  return __riscv_vxor(x, y, VECTLENDP);
 }
 static INLINE vint vsll_vi_vi_i(vint x, int c) {
-  return vsll(x, c, VECTLENDP);
+  return __riscv_vsll(x, c, VECTLENDP);
 }
 static INLINE vint vsra_vi_vi_i(vint x, int c) {
-  return vsra(x, c, VECTLENDP);
+  return __riscv_vsra(x, c, VECTLENDP);
 }
 static INLINE vint vsrl_vi_vi_i(vint x, int c) {
-  return SLEEF_RVV_DP_VREINTERPRET_VI(vsrl(SLEEF_RVV_DP_VREINTERPRET_VU(x), c, VECTLENDP));
+  return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vsrl(SLEEF_RVV_DP_VREINTERPRET_VU(x), c, VECTLENDP));
 }
 
 
@@ -848,55 +848,55 @@ static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) {
   return SLEEF_RVV_DP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENDP);
 }
 static INLINE vmask vcast_vm_vi(vint vi) {
-  return SLEEF_RVV_DP_VREINTERPRET_VM(vwcvt_x(vi, VECTLENDP));
+  return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vwcvt_x(vi, VECTLENDP));
 }
 static INLINE vmask vcastu_vm_vi(vint vi) {
-  return vsll(SLEEF_RVV_DP_VREINTERPRET_VM(vwcvt_x(vi, VECTLENDP)), 32, VECTLENDP);
+  return __riscv_vsll(SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vwcvt_x(vi, VECTLENDP)), 32, VECTLENDP);
 }
 static INLINE vint vcastu_vi_vm(vmask vm) {
-  return SLEEF_RVV_DP_VREINTERPRET_VI(vnsrl(vm, 32, VECTLENDP));
+  return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vnsrl(vm, 32, VECTLENDP));
 }
 static INLINE vint vcast_vi_vm(vmask vm) {
-  return SLEEF_RVV_DP_VREINTERPRET_VI(vncvt_x(vm, VECTLENDP));
+  return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vncvt_x(vm, VECTLENDP));
 }
 static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) {
-  return vmerge(vmnot(x, VECTLENDP), y, 0, VECTLENDP);
+  return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENDP), VECTLENDP);
 }
 static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) {
-  return vand(x, y, VECTLENDP);
+  return __riscv_vand(x, y, VECTLENDP);
 }
 static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) {
-  return vor(x, y, VECTLENDP);
+  return __riscv_vor(x, y, VECTLENDP);
 }
 static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) {
-  return vxor(x, y, VECTLENDP);
+  return __riscv_vxor(x, y, VECTLENDP);
 }
 static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) {
-  return vand(SLEEF_RVV_DP_VREINTERPRET_VM(vnot(SLEEF_RVV_DP_VREINTERPRET_VI64(x), VECTLENDP)), y, VECTLENDP);
+  return __riscv_vand(SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vnot(SLEEF_RVV_DP_VREINTERPRET_VI64(x), VECTLENDP)), y, VECTLENDP);
 }
 static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) {
-  return vmerge(x, y, 0, VECTLENDP);
+  return __riscv_vmerge(y, 0, x, VECTLENDP);
 }
 static INLINE vmask vsll64_vm_vm_i(vmask mask, int64_t c) {
-  return vsll(mask, c, VECTLENDP);
+  return __riscv_vsll(mask, c, VECTLENDP);
 }
 static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) {
-  return SLEEF_RVV_DP_VREINTERPRET_VM(vsub(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP));
+  return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vsub(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP));
 }
 static INLINE vmask vsrl64_vm_vm_i(vmask mask, int64_t c) {
-  return vsrl(mask, c, VECTLENDP);
+  return __riscv_vsrl(mask, c, VECTLENDP);
 }
 static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) {
-  return vadd(x, y, VECTLENDP);
+  return __riscv_vadd(x, y, VECTLENDP);
 }
 static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) {
-  return vmerge(x, y, -1, VECTLENDP);
+  return __riscv_vmerge(y, -1, x, VECTLENDP);
 }
 static INLINE vmask vsel_vm_vo64_vm_vm(vopmask mask, vmask x, vmask y) {
-  return vmerge(mask, y, x, VECTLENDP);
+  return __riscv_vmerge(y, x, mask, VECTLENDP);
 }
 static INLINE vmask vneg64_vm_vm(vmask mask) {
-  return SLEEF_RVV_DP_VREINTERPRET_VM(vneg(SLEEF_RVV_DP_VREINTERPRET_VI64(mask), VECTLENDP));
+  return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vneg(SLEEF_RVV_DP_VREINTERPRET_VI64(mask), VECTLENDP));
 }
 static INLINE vdouble vreinterpret_vd_vm(vmask vm) {
   return SLEEF_RVV_DP_VREINTERPRET_VD(vm);
@@ -910,12 +910,12 @@ static INLINE const vmask vqgetx_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v,
 static INLINE const vmask vqgety_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v, 1); }
 static INLINE vquad vqsetxy_vq_vm_vm(vmask x, vmask y) {
   vquad res;
-  res = vset(res, 0, x);
-  res = vset(res, 1, y);
+  res = __riscv_vset(res, 0, x);
+  res = __riscv_vset(res, 1, y);
   return res;
 }
-static INLINE vquad vqsetx_vq_vq_vm(vquad v, vmask x) { return vset(v, 0, x); }
-static INLINE vquad vqsety_vq_vq_vm(vquad v, vmask y) { return vset(v, 1, y); }
+static INLINE vquad vqsetx_vq_vq_vm(vquad v, vmask x) { return __riscv_vset(v, 0, x); }
+static INLINE vquad vqsety_vq_vq_vm(vquad v, vmask y) { return __riscv_vset(v, 1, y); }
 
 
 
@@ -929,87 +929,87 @@ static INLINE vopmask vcast_vo32_vo64(vopmask vo) {
   return vo;
 }
 static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) {
-  return vmand(x, y, VECTLENDP);
+  return __riscv_vmand(x, y, VECTLENDP);
 }
 static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) {
-  return vmandn(y, x, VECTLENDP);
+  return __riscv_vmandn(y, x, VECTLENDP);
 }
 static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) {
-  return vmor(x, y, VECTLENDP);
+  return __riscv_vmor(x, y, VECTLENDP);
 }
 static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) {
-  return vmxor(x, y, VECTLENDP);
+  return __riscv_vmxor(x, y, VECTLENDP);
 }
 static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) {
-  return vmseq(x, y, VECTLENDP);
+  return __riscv_vmseq(x, y, VECTLENDP);
 }
 static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) {
-  return vmsgt(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP);
+  return __riscv_vmsgt(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP);
 }
 // double-precision comparison
 static INLINE vopmask visinf_vo_vd(vdouble d) {
-  return vmfeq(vfabs(d, VECTLENDP), SLEEF_INFINITY, VECTLENDP);
+  return __riscv_vmfeq(__riscv_vfabs(d, VECTLENDP), SLEEF_INFINITY, VECTLENDP);
 }
 static INLINE vopmask vispinf_vo_vd(vdouble d) {
-  return vmfeq(d, SLEEF_INFINITY, VECTLENDP);
+  return __riscv_vmfeq(d, SLEEF_INFINITY, VECTLENDP);
 }
 static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) {
-  return vmfeq(x, y, VECTLENDP);
+  return __riscv_vmfeq(x, y, VECTLENDP);
 }
 static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) {
-  return vmfne(x, y, VECTLENDP);
+  return __riscv_vmfne(x, y, VECTLENDP);
 }
 static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) {
-  return vmflt(x, y, VECTLENDP);
+  return __riscv_vmflt(x, y, VECTLENDP);
 }
 static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) {
-  return vmfle(x, y, VECTLENDP);
+  return __riscv_vmfle(x, y, VECTLENDP);
 }
 static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) {
-  return vmfgt(x, y, VECTLENDP);
+  return __riscv_vmfgt(x, y, VECTLENDP);
 }
 static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) {
-  return vmfge(x, y, VECTLENDP);
+  return __riscv_vmfge(x, y, VECTLENDP);
 }
 static INLINE vopmask visnan_vo_vd(vdouble d) {
-  return vmfne(d, d, VECTLENDP);
+  return __riscv_vmfne(d, d, VECTLENDP);
 }
 // double-precision conditional select
 static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) {
-  return vmerge(mask, y, x, VECTLENDP);
+  return __riscv_vmerge(y, x, mask, VECTLENDP);
 }
 static INLINE vdouble vsel_vd_vo_d_d(vopmask mask, double v0, double v1) {
-  return vfmerge(mask, vcast_vd_d(v1), v0, VECTLENDP);
+  return __riscv_vfmerge(vcast_vd_d(v1), v0, mask, VECTLENDP);
 }
 static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
-  return vfmerge(o0, vfmerge(o1, vcast_vd_d(d2), d1, VECTLENDP), d0, VECTLENDP);
+  return __riscv_vfmerge(__riscv_vfmerge(vcast_vd_d(d2), d1, o1, VECTLENDP), d0, o0, VECTLENDP);
 }
 static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
-  return vfmerge(o0, vfmerge(o1, vfmerge(o2, vcast_vd_d(d3), d2, VECTLENDP), d1, VECTLENDP), d0, VECTLENDP);
+  return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vd_d(d3), d2, o2, VECTLENDP), d1, o1, VECTLENDP), d0, o0, VECTLENDP);
 }
 static INLINE int vtestallones_i_vo64(vopmask g) {
-  return vcpop(g, VECTLENDP) == VECTLENDP;
+  return __riscv_vcpop(g, VECTLENDP) == VECTLENDP;
 }
 // integer comparison
 static INLINE vopmask veq_vo_vi_vi(vint x, vint y) {
-  return vmseq(x, y, VECTLENDP);
+  return __riscv_vmseq(x, y, VECTLENDP);
 }
 static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) {
-  return vmsgt(x, y, VECTLENDP);
+  return __riscv_vmsgt(x, y, VECTLENDP);
 }
 static INLINE vint vgt_vi_vi_vi(vint x, vint y) {
   vint zero = vcast_vi_i(0);
-  return vmerge(vmsgt(x, y, VECTLENDP), zero, -1, VECTLENDP);
+  return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, VECTLENDP), VECTLENDP);
 }
 // integer conditional select
 static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) {
-  return vmerge(m, y, x, VECTLENDP);
+  return __riscv_vmerge(y, x, m, VECTLENDP);
 }
 static INLINE vint vandnot_vi_vo_vi(vopmask mask, vint vi) {
-  return vmerge(mask, vi, 0, VECTLENDP);
+  return __riscv_vmerge(vi, 0, mask, VECTLENDP);
 }
 static INLINE vint vand_vi_vo_vi(vopmask x, vint y) {
-  return vmerge(vmnot(x, VECTLENDP), y, 0, VECTLENDP);
+  return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENDP), VECTLENDP);
 }
 #endif // ENABLE_RVV_DP
 

From 55fd4530f756bdad9971212634952e4c3f07663d Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Sat, 4 Nov 2023 11:06:45 +0000
Subject: [PATCH 04/24] Add riscv64 CI on GitHub Actions

---
 .github/workflows/build_and_test.yml | 36 ++++++++++++++++++++++++--
 Configure.cmake                      | 38 ++++++++++++++++++++++++++--
 2 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 3540b600..158a091d 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -33,8 +33,8 @@ jobs:
         run: sudo apt-get update -y -qq && sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev
 
       - name: Build native
+        shell: bash -ex -o pipefail {0}
         run: |
-          set -x
           EXTRA_CMAKE_FLAGS="-DENFORCE_SSE2=ON -DENFORCE_SSE4=ON -DENFORCE_AVX=ON -DENFORCE_AVX=ON -DENFORCE_AVX2=ON -DENFORCE_AVX512F=ON -DENFORCE_FMA4=ON"
           cmake -S . -B _build-native -GNinja \
             -DCMAKE_INSTALL_PREFIX=$(pwd)/_install-native \
@@ -108,6 +108,8 @@ jobs:
             package: -powerpc64le-linux-gnu
           # IBM Z
           - arch: s390x
+          # RISC-V
+          - arch: riscv64
 
     name: build-${{ matrix.arch }}
     steps:
@@ -120,6 +122,14 @@ jobs:
           sudo apt-get update -y -qq
           sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.package || format('-{0}-linux-gnu', matrix.arch) }}
 
+      - name: Download riscv-gnu-toolchain's LLVM build
+        env:
+          RISCV_GNU_TOOLCHAIN_TAG: "2023.10.18"
+        run: |
+          curl -L https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${RISCV_GNU_TOOLCHAIN_TAG}/riscv64-glibc-ubuntu-20.04-llvm-nightly-${RISCV_GNU_TOOLCHAIN_TAG}-nightly.tar.gz |
+            tar xzf - -C /opt
+        if: ${{ matrix.arch == 'riscv64' }}
+
       - name: Download build-native artifacts
         uses: actions/download-artifact@v3
         with:
@@ -130,8 +140,13 @@ jobs:
           chmod +x _build-native/bin/*
 
       - name: Build ${{ matrix.arch }}
+        shell: bash -ex -o pipefail {0}
         run: |
-          set -x
+          # Add riscv-gnu-toolchain to PATH
+          if [[ ${{ matrix.arch }} = "riscv64" ]]; then
+            export PATH="/opt/riscv/bin:$PATH"
+          fi
+
           EXTRA_CMAKE_FLAGS=""
           if [[ ${{ matrix.arch }} = "aarch64" ]]; then
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_SVE=ON"
@@ -144,6 +159,14 @@ jobs:
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE=ON"
             # Disable VXE2 support, QEMU doesn't support it
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON"
+          elif [[ ${{ matrix.arch }} = "riscv64" ]]; then
+            EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_RVVM1=ON -DENFORCE_RVVM2=ON"
+            # Disable inline headers, they just don't compile on riscv64
+            EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_INLINE_HEADERS=OFF"
+            # Disable dft, it fails with linker error to `cexp`
+            EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_DFT=OFF"
+            # Disable quad, it's missing the `Sleef_quad` function
+            EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_QUAD=OFF"
           fi
           cmake -S . -B _build-${{ matrix.arch }} -GNinja \
             -DCMAKE_INSTALL_PREFIX="$(pwd)/_install-${{ matrix.arch }}" \
@@ -190,6 +213,15 @@ jobs:
           # IBM Z
           # TODO: figure out qemu_cpu variable to make tests pass on QEMU
           - arch: s390x
+          # RISC-V
+          - arch: riscv64
+            qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=false"
+          - arch: riscv64
+            qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0"
+          - arch: riscv64
+            qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0"
+          - arch: riscv64
+            qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0"
 
     name: "test-${{ matrix.arch }} (qemu_cpu: \"${{ matrix.qemu_cpu }}\")"
     steps:
diff --git a/Configure.cmake b/Configure.cmake
index 25392d09..6cb9945f 100644
--- a/Configure.cmake
+++ b/Configure.cmake
@@ -121,8 +121,6 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
   set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z14;-mzvector")
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
   set(SLEEF_ARCH_RISCV64 ON CACHE INTERNAL "True for RISCV64 architecture.")
-  set(COMPILER_SUPPORTS_RVVM1 1)
-  set(COMPILER_SUPPORTS_RVVM2 1)
 endif()
 
 set(COMPILER_SUPPORTS_PUREC_SCALAR 1)
@@ -623,6 +621,42 @@ if (ENFORCE_VXE2 AND NOT COMPILER_SUPPORTS_VXE2)
   message(FATAL_ERROR "ENFORCE_VXE2 is specified and that feature is disabled or not supported by the compiler")
 endif()
 
+# RVVM1
+
+option(DISABLE_RVVM1 "Disable RVVM1" OFF)
+option(ENFORCE_RVVM1 "Build fails if RVVM1 is not supported by the compiler" OFF)
+
+if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM1)
+  string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_RVVM1}")
+  CHECK_C_SOURCE_COMPILES("
+  #include <riscv_vector.h>
+  int main() {
+    vint32m1_t r = __riscv_vmv_v_x_i32m1(1, __riscv_v_min_vlen / 32); }"
+    COMPILER_SUPPORTS_RVVM1)
+endif()
+
+if (ENFORCE_RVVM1 AND NOT COMPILER_SUPPORTS_RVVM1)
+  message(FATAL_ERROR "ENFORCE_RVVM1 is specified and that feature is disabled or not supported by the compiler")
+endif()
+
+# RVVM2
+
+option(DISABLE_RVVM2 "Disable RVVM2" OFF)
+option(ENFORCE_RVVM2 "Build fails if RVVM2 is not supported by the compiler" OFF)
+
+if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM2)
+  string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_RVVM2}")
+  CHECK_C_SOURCE_COMPILES("
+  #include <riscv_vector.h>
+  int main() {
+    vint32m2_t r = __riscv_vmv_v_x_i32m2(1, __riscv_v_min_vlen / 32); }"
+    COMPILER_SUPPORTS_RVVM2)
+endif()
+
+if (ENFORCE_RVVM2 AND NOT COMPILER_SUPPORTS_RVVM2)
+  message(FATAL_ERROR "ENFORCE_RVVM2 is specified and that feature is disabled or not supported by the compiler")
+endif()
+
 # CUDA
 
 option(ENFORCE_CUDA "Build fails if CUDA is not supported" OFF)

From 8339480189e0504a65862c0e48a92156abe53d6c Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Mon, 13 Nov 2023 11:11:30 +0000
Subject: [PATCH 05/24] Add gcc and llvm builds

---
 .github/workflows/build_and_test.yml          | 181 +++++++++++++-----
 travis/before_script.aarch64-gcc.sh           |   2 +-
 travis/before_script.armhf-gcc.sh             |   2 +-
 ...ch64.cmake => toolchain-aarch64-gcc.cmake} |   0
 travis/toolchain-aarch64-llvm.cmake           |  12 ++
 ...-armhf.cmake => toolchain-armhf-gcc.cmake} |   0
 travis/toolchain-armhf-llvm.cmake             |  12 ++
 travis/toolchain-native-gcc.cmake             |   1 +
 travis/toolchain-native-llvm.cmake            |   1 +
 ...64el.cmake => toolchain-ppc64el-gcc.cmake} |   0
 travis/toolchain-ppc64el-llvm.cmake           |  14 ++
 ...cv64.cmake => toolchain-riscv64-gcc.cmake} |   2 +-
 travis/toolchain-riscv64-llvm.cmake           |  12 ++
 ...-s390x.cmake => toolchain-s390x-gcc.cmake} |   0
 travis/toolchain-s390x-llvm.cmake             |  12 ++
 15 files changed, 204 insertions(+), 47 deletions(-)
 rename travis/{toolchain-aarch64.cmake => toolchain-aarch64-gcc.cmake} (100%)
 create mode 100644 travis/toolchain-aarch64-llvm.cmake
 rename travis/{toolchain-armhf.cmake => toolchain-armhf-gcc.cmake} (100%)
 create mode 100644 travis/toolchain-armhf-llvm.cmake
 create mode 100644 travis/toolchain-native-gcc.cmake
 create mode 100644 travis/toolchain-native-llvm.cmake
 rename travis/{toolchain-ppc64el.cmake => toolchain-ppc64el-gcc.cmake} (100%)
 create mode 100644 travis/toolchain-ppc64el-llvm.cmake
 rename travis/{toolchain-riscv64.cmake => toolchain-riscv64-gcc.cmake} (79%)
 create mode 100644 travis/toolchain-riscv64-llvm.cmake
 rename travis/{toolchain-s390x.cmake => toolchain-s390x-gcc.cmake} (100%)
 create mode 100644 travis/toolchain-s390x-llvm.cmake

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 158a091d..6f611be0 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -7,6 +7,10 @@ on:
   push:
   pull_request:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 permissions:
   contents: read
 
@@ -24,13 +28,21 @@ env:
 jobs:
   build-native:
     runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        compiler: [gcc, llvm]
+
+    name: build-native-${{ matrix.compiler }}
     steps:
       - uses: actions/checkout@v4.1.1
         with:
           persist-credentials: false
 
       - name: Install dependencies
-        run: sudo apt-get update -y -qq && sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev
+        run: |
+          sudo apt-get update -y -qq
+          sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev
 
       - name: Build native
         shell: bash -ex -o pipefail {0}
@@ -38,15 +50,16 @@ jobs:
           EXTRA_CMAKE_FLAGS="-DENFORCE_SSE2=ON -DENFORCE_SSE4=ON -DENFORCE_AVX=ON -DENFORCE_AVX=ON -DENFORCE_AVX2=ON -DENFORCE_AVX512F=ON -DENFORCE_FMA4=ON"
           cmake -S . -B _build-native -GNinja \
             -DCMAKE_INSTALL_PREFIX=$(pwd)/_install-native \
+            -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-native-${{ matrix.compiler }}.cmake \
             ${COMMON_CMAKE_FLAGS} \
             ${EXTRA_CMAKE_FLAGS}
           cmake --build _build-native
           cmake --install _build-native
 
-      - name: Upload build-native artifacts
+      - name: Upload build-native-${{ matrix.compiler }} artifacts
         uses: actions/upload-artifact@v3
         with:
-          name: build-native
+          name: build-native-${{ matrix.compiler }}
           path: |
             _build-*
             _install-*
@@ -55,6 +68,12 @@ jobs:
   test-native:
     runs-on: ubuntu-latest
     needs: [build-native]
+    strategy:
+      fail-fast: false
+      matrix:
+        compiler: [gcc, llvm]
+
+    name: test-native-${{ matrix.compiler }}
     steps:
       - uses: actions/checkout@v4.1.1
         with:
@@ -67,12 +86,12 @@ jobs:
         run: |
           cat /proc/cpuinfo
 
-      - name: Download build-native artifacts
+      - name: Download build-native-${{ matrix.compiler }} artifacts
         uses: actions/download-artifact@v3
         with:
-          name: build-native
+          name: build-native-${{ matrix.compiler }}
 
-      - name: Fix build-native permissions
+      - name: Fix _build-native permissions
         run: |
           chmod +x _build-native/bin/*
 
@@ -97,21 +116,40 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
+        compiler: [gcc, llvm]
+        arch: [aarch64, armhf, ppc64el, s390x, riscv64]
         include:
-          # AArch64
-          - arch: aarch64
-          # Aarch32
           - arch: armhf
-            package: -arm-linux-gnueabihf
-          # PPC64
+            gnupkg: -arm-linux-gnueabihf
           - arch: ppc64el
-            package: -powerpc64le-linux-gnu
-          # IBM Z
+            gnupkg: -powerpc64le-linux-gnu
+        exclude:
+          # It fails with the following error:
+          # ```
+          # FAILED: include/sleefinline_vsx3.h /home/runner/work/sleef/sleef/_build-ppc64el/include/sleefinline_vsx3.h
+          # cd /home/runner/work/sleef/sleef/_build-ppc64el/src/libm && /usr/bin/clang -E -C -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/ -DSLEEF_GENHEADER -DENABLE_VSX3 -DDORENAME /home/runner/work/sleef/sleef/src/libm/sleefsimddp.c > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 && /usr/bin/sed -n -e "/^\\/\\/@#.*\$/p" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp2 && /usr/bin/sed -e "s/^\\/\\/@#/#/g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp2 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/macroonlyVSX3.h && /usr/bin/clang -E -C -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/ -DSLEEF_GENHEADER -DENABLE_VSX3 -DDORENAME /home/runner/work/sleef/sleef/src/libm/sleefsimdsp.c >> /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 && /usr/bin/sed -e "s/^#.*//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.c && /usr/bin/clang -E /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.c > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp3 && /usr/bin/sed -e s/SLEEF_VERSION_SLEEF/3.6.0/g -e s/SLEEF_SIMD_SLEEF/VSX3/g /home/runner/work/sleef/sleef/src/libm/sleefinline_header.h.org > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 && /usr/bin/sed -e "s/^#.*//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp3 >> /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 && /usr/bin/sed -e "s/^SLEEFSHARP/#/g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp5 && /usr/bin/sed -e s/SLEEFXXX//g /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp5 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp6 && /usr/bin/sed -e "s/^[[:space:]]*\$//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp6 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp7 && /usr/bin/sed "/^\$/N;/^\\n\$/D" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp7 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp8 && /home/runner/work/sleef/sleef/_build-native/bin/addSuffix /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp8 /home/runner/work/sleef/sleef/src/common/keywords.txt _vsx3_sleef Sleef_rempitabdp Sleef_rempitabsp > /home/runner/work/sleef/sleef/_build-ppc64el/include/sleefinline_vsx3.h
+          # In file included from /home/runner/work/sleef/sleef/src/libm/sleefsimddp.c:178:
+          # /home/runner/work/sleef/sleef/src/arch/helperpower_128.h:9:2: error: Please specify -mcpu=power8 or -mcpu=power9
+          # #error Please specify -mcpu=power8 or -mcpu=power9
+          # ```
+          - arch: ppc64el
+            compiler: llvm
+          # It fails with the following error:
+          # ```
+          # FAILED: src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o
+          # /usr/bin/clang --target=s390x-linux-gnu -DENABLE_ALIAS=1 -DENABLE_PUREC_SCALAR=1 -DENABLE_SYS_getrandom=1 -DMACRO_ONLY_HEADER=\"macroonlyPUREC_SCALAR.h\" -DSIMD_SUFFIX=_purec_scalar_sleef -DSLEEF_STATIC_LIBS=1 -DUSE_INLINE_HEADER=\"sleefinline_purec_scalar.h\" -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-s390x/include -I/home/runner/work/sleef/sleef/src/libm -I/home/runner/work/sleef/sleef/_build-s390x/src/libm/include -Wall -Wno-unused-function -Wno-attributes -Wno-unused-result -ffp-contract=off -fno-math-errno -fno-trapping-math -fno-strict-aliasing -O3 -DNDEBUG -std=gnu99 -MD -MT src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o -MF src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o.d -o src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o -c /home/runner/work/sleef/sleef/src/libm-tester/iutsimd.c
+          # In file included from /home/runner/work/sleef/sleef/src/libm-tester/iutsimd.c:65:
+          # /usr/lib/llvm-13/lib/clang/13.0.1/include/vecintrin.h:11125:2: error: "Use -fzvector to enable vector extensions"
+          # #error "Use -fzvector to enable vector extensions"
+          # ```
           - arch: s390x
-          # RISC-V
+            compiler: llvm
+          # Only GCC trunk supports the RISC-V V intrinsics and https://github.com/riscv-collab/riscv-gnu-toolchain
+          # doesn't track a recent enough version yet
           - arch: riscv64
+            compiler: gcc
 
-    name: build-${{ matrix.arch }}
+    name: build-${{ matrix.arch }}-${{ matrix.compiler }}
     steps:
       - uses: actions/checkout@v4.1.1
         with:
@@ -120,33 +158,38 @@ jobs:
       - name: Install dependencies
         run: |
           sudo apt-get update -y -qq
-          sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.package || format('-{0}-linux-gnu', matrix.arch) }}
+          sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.gnupkg || format('-{0}-linux-gnu', matrix.arch) }}
 
-      - name: Download riscv-gnu-toolchain's LLVM build
-        env:
-          RISCV_GNU_TOOLCHAIN_TAG: "2023.10.18"
+      - name: Install gcc
         run: |
-          curl -L https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${RISCV_GNU_TOOLCHAIN_TAG}/riscv64-glibc-ubuntu-20.04-llvm-nightly-${RISCV_GNU_TOOLCHAIN_TAG}-nightly.tar.gz |
+          RISCV_GNU_TOOLCHAIN_TAG="2023.11.08"
+          curl -L https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${RISCV_GNU_TOOLCHAIN_TAG}/riscv64-glibc-ubuntu-20.04-gcc-nightly-${RISCV_GNU_TOOLCHAIN_TAG}-nightly.tar.gz |
             tar xzf - -C /opt
-        if: ${{ matrix.arch == 'riscv64' }}
+          echo "PATH=/opt/riscv/bin:$PATH" >> $GITHUB_ENV
+        if: ${{ matrix.compiler == 'gcc' && matrix.arch == 'riscv64' }}
 
-      - name: Download build-native artifacts
+      - name: Install llvm
+        run: |
+          LLVM_VERSION="17"
+          curl -o llvm.sh https://apt.llvm.org/llvm.sh
+          chmod u+x llvm.sh
+          sudo ./llvm.sh ${LLVM_VERSION}
+          sudo ln -srf $(which clang-${LLVM_VERSION}) /usr/bin/clang
+          rm llvm.sh
+        if: ${{ matrix.compiler == 'llvm' }}
+
+      - name: Download build-native-${{ matrix.compiler }} artifacts
         uses: actions/download-artifact@v3
         with:
-          name: build-native
+          name: build-native-${{ matrix.compiler }}
 
-      - name: Fix build-native permissions
+      - name: Fix _build-native permissions
         run: |
           chmod +x _build-native/bin/*
 
       - name: Build ${{ matrix.arch }}
         shell: bash -ex -o pipefail {0}
         run: |
-          # Add riscv-gnu-toolchain to PATH
-          if [[ ${{ matrix.arch }} = "riscv64" ]]; then
-            export PATH="/opt/riscv/bin:$PATH"
-          fi
-
           EXTRA_CMAKE_FLAGS=""
           if [[ ${{ matrix.arch }} = "aarch64" ]]; then
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_SVE=ON"
@@ -157,8 +200,12 @@ jobs:
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VSX=ON -DENFORCE_VSX3=ON"
           elif [[ ${{ matrix.arch }} = "s390x" ]]; then
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE=ON"
-            # Disable VXE2 support, QEMU doesn't support it
-            EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON"
+            if [[ ${{ matrix.compiler }} = "gcc" ]]; then
+              # Disable VXE2 support, QEMU doesn't support some instructions generated by gcc
+              EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON"
+            else
+              EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE2=ON"
+            fi
           elif [[ ${{ matrix.arch }} = "riscv64" ]]; then
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_RVVM1=ON -DENFORCE_RVVM2=ON"
             # Disable inline headers, they just don't compile on riscv64
@@ -168,19 +215,20 @@ jobs:
             # Disable quad, it's missing the `Sleef_quad` function
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_QUAD=OFF"
           fi
+
           cmake -S . -B _build-${{ matrix.arch }} -GNinja \
             -DCMAKE_INSTALL_PREFIX="$(pwd)/_install-${{ matrix.arch }}" \
-            -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-${{ matrix.arch }}.cmake \
+            -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-${{ matrix.arch }}-${{ matrix.compiler }}.cmake \
             -DNATIVE_BUILD_DIR="$(pwd)/_build-native" \
             ${COMMON_CMAKE_FLAGS} \
             ${EXTRA_CMAKE_FLAGS}
           cmake --build _build-${{ matrix.arch }}
           cmake --install _build-${{ matrix.arch }}
 
-      - name: Upload build-${{ matrix.arch }} artifacts
+      - name: Upload build-${{ matrix.arch }}-${{ matrix.compiler }} artifacts
         uses: actions/upload-artifact@v3
         with:
-          name: build-${{ matrix.arch }}
+          name: build-${{ matrix.arch }}-${{ matrix.compiler }}
           path: |
             _build-${{ matrix.arch }}
             _install-${{ matrix.arch }}
@@ -195,35 +243,81 @@ jobs:
         include:
           # AArch64
           - arch: aarch64
+            compiler: gcc
             qemu_cpu: "max,sve=off"
           - arch: aarch64
+            compiler: gcc
             qemu_cpu: "max,sve=on,sve128=on"
           - arch: aarch64
+            compiler: gcc
             qemu_cpu: "max,sve=on,sve256=on"
           - arch: aarch64
+            compiler: gcc
             qemu_cpu: "max,sve=on,sve512=on"
+          # Some tests fail when compiled with LLVM only
+          # - arch: aarch64
+          #   compiler: llvm
+          #   qemu_cpu: "max,sve=off"
+          # - arch: aarch64
+          #   compiler: llvm
+          #   qemu_cpu: "max,sve=on,sve128=on"
+          # - arch: aarch64
+          #   compiler: llvm
+          #   qemu_cpu: "max,sve=on,sve256=on"
+          # - arch: aarch64
+          #   compiler: llvm
+          #   qemu_cpu: "max,sve=on,sve512=on"
           # Aarch32
           - arch: armhf
+            compiler: gcc
+            binfmt: arm
+            qemu_cpu: "max"
+          - arch: armhf
+            compiler: llvm
             binfmt: arm
             qemu_cpu: "max"
           # PPC64
           - arch: ppc64el
+            compiler: gcc
             binfmt: ppc64le
             qemu_cpu: "power10"
+          # - arch: ppc64el
+          #   compiler: llvm
+          #   binfmt: ppc64le
+          #   qemu_cpu: "power10"
           # IBM Z
           # TODO: figure out qemu_cpu variable to make tests pass on QEMU
           - arch: s390x
+            compiler: gcc
+          # - arch: s390x
+          #   compiler: llvm
           # RISC-V
+          # - arch: riscv64
+          #   compiler: gcc
+          #   qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=false"
+          # - arch: riscv64
+          #   compiler: gcc
+          #   qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0"
+          # - arch: riscv64
+          #   compiler: gcc
+          #   qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0"
+          # - arch: riscv64
+          #   compiler: gcc
+          #   qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0"
           - arch: riscv64
+            compiler: llvm
             qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=false"
           - arch: riscv64
+            compiler: llvm
             qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0"
           - arch: riscv64
+            compiler: llvm
             qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0"
           - arch: riscv64
+            compiler: llvm
             qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0"
 
-    name: "test-${{ matrix.arch }} (qemu_cpu: \"${{ matrix.qemu_cpu }}\")"
+    name: "test-${{ matrix.arch }}-${{ matrix.compiler }} (qemu_cpu: \"${{ matrix.qemu_cpu }}\")"
     steps:
       - uses: actions/checkout@v4.1.1
         with:
@@ -240,20 +334,19 @@ jobs:
         run: |
           cat /proc/cpuinfo
 
-      - name: Download build-native artifacts
+      - name: Download build-native-${{ matrix.compiler }} artifacts
         uses: actions/download-artifact@v3
         with:
-          name: build-native
+          name: build-native-${{ matrix.compiler }}
 
-      - name: Download build-${{ matrix.arch }} artifacts
+      - name: Download build-${{ matrix.arch }}-${{ matrix.compiler }} artifacts
         uses: actions/download-artifact@v3
         with:
-          name: build-${{ matrix.arch }}
+          name: build-${{ matrix.arch }}-${{ matrix.compiler }}
 
-      - name: Fix build-native and _build-${{ matrix.arch }} permissions
+      - name: Fix _build-native and _build-${{ matrix.arch }} permissions
         run: |
-          chmod +x _build-native/bin/*
-          chmod +x _build-${{ matrix.arch }}/bin/*
+          chmod +x _build-native/bin/* _build-${{ matrix.arch }}/bin/*
 
       - name: Test ${{ matrix.arch }}
         env:
@@ -265,10 +358,10 @@ jobs:
           cd _build-${{ matrix.arch }}
           ctest -j$(nproc)
 
-      - name: Upload test-${{ matrix.arch }}-${{ strategy.job-index }} artifacts
+      - name: Upload test-${{ matrix.arch }}-${{ matrix.compiler }}-${{ strategy.job-index }} artifacts
         uses: actions/upload-artifact@v3
         with:
-          name: test-${{ matrix.arch }}-${{ strategy.job-index }}
+          name: test-${{ matrix.arch }}-${{ matrix.compiler }}-${{ strategy.job-index }}
           path: |
             _build-${{ matrix.arch }}/Testing
         if: always()
diff --git a/travis/before_script.aarch64-gcc.sh b/travis/before_script.aarch64-gcc.sh
index 56c4c88c..f590a9db 100644
--- a/travis/before_script.aarch64-gcc.sh
+++ b/travis/before_script.aarch64-gcc.sh
@@ -8,5 +8,5 @@ ninja all
 cd /build
 mkdir build-cross
 cd build-cross
-cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE ..
+cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64-gcc.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE ..
 
diff --git a/travis/before_script.armhf-gcc.sh b/travis/before_script.armhf-gcc.sh
index 8c4bd4fa..464fa581 100644
--- a/travis/before_script.armhf-gcc.sh
+++ b/travis/before_script.armhf-gcc.sh
@@ -8,4 +8,4 @@ ninja all
 cd /build
 mkdir build-cross
 cd build-cross
-cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE ..
+cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf-gcc.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE ..
diff --git a/travis/toolchain-aarch64.cmake b/travis/toolchain-aarch64-gcc.cmake
similarity index 100%
rename from travis/toolchain-aarch64.cmake
rename to travis/toolchain-aarch64-gcc.cmake
diff --git a/travis/toolchain-aarch64-llvm.cmake b/travis/toolchain-aarch64-llvm.cmake
new file mode 100644
index 00000000..d9c11ae0
--- /dev/null
+++ b/travis/toolchain-aarch64-llvm.cmake
@@ -0,0 +1,12 @@
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "aarch64")
+
+SET(CMAKE_FIND_ROOT_PATH  /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu)
+
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang)
+set(CMAKE_C_COMPILER_TARGET aarch64-linux-gnu)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/travis/toolchain-armhf.cmake b/travis/toolchain-armhf-gcc.cmake
similarity index 100%
rename from travis/toolchain-armhf.cmake
rename to travis/toolchain-armhf-gcc.cmake
diff --git a/travis/toolchain-armhf-llvm.cmake b/travis/toolchain-armhf-llvm.cmake
new file mode 100644
index 00000000..6c157289
--- /dev/null
+++ b/travis/toolchain-armhf-llvm.cmake
@@ -0,0 +1,12 @@
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "armhf")
+
+SET(CMAKE_FIND_ROOT_PATH  /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf)
+
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang)
+set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabihf)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/travis/toolchain-native-gcc.cmake b/travis/toolchain-native-gcc.cmake
new file mode 100644
index 00000000..07ea294d
--- /dev/null
+++ b/travis/toolchain-native-gcc.cmake
@@ -0,0 +1 @@
+find_program(CMAKE_C_COMPILER gcc)
diff --git a/travis/toolchain-native-llvm.cmake b/travis/toolchain-native-llvm.cmake
new file mode 100644
index 00000000..6f8e7121
--- /dev/null
+++ b/travis/toolchain-native-llvm.cmake
@@ -0,0 +1 @@
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang)
diff --git a/travis/toolchain-ppc64el.cmake b/travis/toolchain-ppc64el-gcc.cmake
similarity index 100%
rename from travis/toolchain-ppc64el.cmake
rename to travis/toolchain-ppc64el-gcc.cmake
diff --git a/travis/toolchain-ppc64el-llvm.cmake b/travis/toolchain-ppc64el-llvm.cmake
new file mode 100644
index 00000000..531b36f3
--- /dev/null
+++ b/travis/toolchain-ppc64el-llvm.cmake
@@ -0,0 +1,14 @@
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "ppc64")
+
+SET(CMAKE_FIND_ROOT_PATH  /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu)
+
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang)
+set(CMAKE_C_COMPILER_TARGET powerpc64le-linux-gnu)
+
+SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/travis/toolchain-riscv64.cmake b/travis/toolchain-riscv64-gcc.cmake
similarity index 79%
rename from travis/toolchain-riscv64.cmake
rename to travis/toolchain-riscv64-gcc.cmake
index bb7b4977..fb7dc230 100644
--- a/travis/toolchain-riscv64.cmake
+++ b/travis/toolchain-riscv64-gcc.cmake
@@ -2,7 +2,7 @@ set(CMAKE_CROSSCOMPILING    TRUE)
 set(CMAKE_SYSTEM_NAME       "Linux")
 set(CMAKE_SYSTEM_PROCESSOR  "riscv64")
 
-find_program(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-clang)
+find_program(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-gcc)
 
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
diff --git a/travis/toolchain-riscv64-llvm.cmake b/travis/toolchain-riscv64-llvm.cmake
new file mode 100644
index 00000000..1821770a
--- /dev/null
+++ b/travis/toolchain-riscv64-llvm.cmake
@@ -0,0 +1,12 @@
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "riscv64")
+
+SET(CMAKE_FIND_ROOT_PATH  /usr/riscv64-linux-gnu /usr/include/riscv64-linux-gnu /usr/lib/riscv64-linux-gnu /lib/riscv64-linux-gnu)
+
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang)
+set(CMAKE_C_COMPILER_TARGET riscv64-linux-gnu)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/travis/toolchain-s390x.cmake b/travis/toolchain-s390x-gcc.cmake
similarity index 100%
rename from travis/toolchain-s390x.cmake
rename to travis/toolchain-s390x-gcc.cmake
diff --git a/travis/toolchain-s390x-llvm.cmake b/travis/toolchain-s390x-llvm.cmake
new file mode 100644
index 00000000..ca5e9687
--- /dev/null
+++ b/travis/toolchain-s390x-llvm.cmake
@@ -0,0 +1,12 @@
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "s390x")
+
+SET(CMAKE_FIND_ROOT_PATH  /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu)
+
+find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang)
+set(CMAKE_C_COMPILER_TARGET s390x-linux-gnu)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

From 3b905a6a213ddf478e3066cb0725378b91ea6b7d Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Mon, 13 Nov 2023 13:41:49 +0000
Subject: [PATCH 06/24] Enable bitmanip extensions on RISC-V

---
 Configure.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Configure.cmake b/Configure.cmake
index 6cb9945f..7ee52250 100644
--- a/Configure.cmake
+++ b/Configure.cmake
@@ -166,8 +166,8 @@ set(CLANG_FLAGS_ENABLE_VXENOFMA "-march=z14;-mzvector")
 set(CLANG_FLAGS_ENABLE_VXE2 "-march=z15;-mzvector")
 set(CLANG_FLAGS_ENABLE_VXE2NOFMA "-march=z15;-mzvector")
 # RISC-V
-set(CLANG_FLAGS_ENABLE_RVVM1 "-march=rv64gcv")
-set(CLANG_FLAGS_ENABLE_RVVM2 "-march=rv64gcv")
+set(CLANG_FLAGS_ENABLE_RVVM1 "-march=rv64gcv_zba_zbb_zbs")
+set(CLANG_FLAGS_ENABLE_RVVM2 "-march=rv64gcv_zba_zbb_zbs")
 
 set(FLAGS_OTHERS "")
 

From 798b14f6d135f67b45b8ae6cca9aa51c9887a543 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Mon, 13 Nov 2023 18:12:30 +0000
Subject: [PATCH 07/24] Enable LLVM build for s390x and ppc64el

---
 .github/workflows/build_and_test.yml | 40 ++++++----------------------
 Configure.cmake                      |  1 +
 src/libm/CMakeLists.txt              |  6 +++++
 src/quad/CMakeLists.txt              |  5 ++++
 4 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 6f611be0..52fa8185 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -124,26 +124,6 @@ jobs:
           - arch: ppc64el
             gnupkg: -powerpc64le-linux-gnu
         exclude:
-          # It fails with the following error:
-          # ```
-          # FAILED: include/sleefinline_vsx3.h /home/runner/work/sleef/sleef/_build-ppc64el/include/sleefinline_vsx3.h
-          # cd /home/runner/work/sleef/sleef/_build-ppc64el/src/libm && /usr/bin/clang -E -C -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/ -DSLEEF_GENHEADER -DENABLE_VSX3 -DDORENAME /home/runner/work/sleef/sleef/src/libm/sleefsimddp.c > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 && /usr/bin/sed -n -e "/^\\/\\/@#.*\$/p" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp2 && /usr/bin/sed -e "s/^\\/\\/@#/#/g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp2 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/macroonlyVSX3.h && /usr/bin/clang -E -C -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/ -DSLEEF_GENHEADER -DENABLE_VSX3 -DDORENAME /home/runner/work/sleef/sleef/src/libm/sleefsimdsp.c >> /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 && /usr/bin/sed -e "s/^#.*//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.c && /usr/bin/clang -E /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.c > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp3 && /usr/bin/sed -e s/SLEEF_VERSION_SLEEF/3.6.0/g -e s/SLEEF_SIMD_SLEEF/VSX3/g /home/runner/work/sleef/sleef/src/libm/sleefinline_header.h.org > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 && /usr/bin/sed -e "s/^#.*//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp3 >> /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 && /usr/bin/sed -e "s/^SLEEFSHARP/#/g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp5 && /usr/bin/sed -e s/SLEEFXXX//g /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp5 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp6 && /usr/bin/sed -e "s/^[[:space:]]*\$//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp6 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp7 && /usr/bin/sed "/^\$/N;/^\\n\$/D" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp7 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp8 && /home/runner/work/sleef/sleef/_build-native/bin/addSuffix /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp8 /home/runner/work/sleef/sleef/src/common/keywords.txt _vsx3_sleef Sleef_rempitabdp Sleef_rempitabsp > /home/runner/work/sleef/sleef/_build-ppc64el/include/sleefinline_vsx3.h
-          # In file included from /home/runner/work/sleef/sleef/src/libm/sleefsimddp.c:178:
-          # /home/runner/work/sleef/sleef/src/arch/helperpower_128.h:9:2: error: Please specify -mcpu=power8 or -mcpu=power9
-          # #error Please specify -mcpu=power8 or -mcpu=power9
-          # ```
-          - arch: ppc64el
-            compiler: llvm
-          # It fails with the following error:
-          # ```
-          # FAILED: src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o
-          # /usr/bin/clang --target=s390x-linux-gnu -DENABLE_ALIAS=1 -DENABLE_PUREC_SCALAR=1 -DENABLE_SYS_getrandom=1 -DMACRO_ONLY_HEADER=\"macroonlyPUREC_SCALAR.h\" -DSIMD_SUFFIX=_purec_scalar_sleef -DSLEEF_STATIC_LIBS=1 -DUSE_INLINE_HEADER=\"sleefinline_purec_scalar.h\" -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-s390x/include -I/home/runner/work/sleef/sleef/src/libm -I/home/runner/work/sleef/sleef/_build-s390x/src/libm/include -Wall -Wno-unused-function -Wno-attributes -Wno-unused-result -ffp-contract=off -fno-math-errno -fno-trapping-math -fno-strict-aliasing -O3 -DNDEBUG -std=gnu99 -MD -MT src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o -MF src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o.d -o src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o -c /home/runner/work/sleef/sleef/src/libm-tester/iutsimd.c
-          # In file included from /home/runner/work/sleef/sleef/src/libm-tester/iutsimd.c:65:
-          # /usr/lib/llvm-13/lib/clang/13.0.1/include/vecintrin.h:11125:2: error: "Use -fzvector to enable vector extensions"
-          # #error "Use -fzvector to enable vector extensions"
-          # ```
-          - arch: s390x
-            compiler: llvm
           # Only GCC trunk supports the RISC-V V intrinsics and https://github.com/riscv-collab/riscv-gnu-toolchain
           # doesn't track a recent enough version yet
           - arch: riscv64
@@ -200,12 +180,8 @@ jobs:
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VSX=ON -DENFORCE_VSX3=ON"
           elif [[ ${{ matrix.arch }} = "s390x" ]]; then
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE=ON"
-            if [[ ${{ matrix.compiler }} = "gcc" ]]; then
-              # Disable VXE2 support, QEMU doesn't support some instructions generated by gcc
-              EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON"
-            else
-              EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE2=ON"
-            fi
+            # Disable VXE2 support, QEMU doesn't support some instructions generated by gcc or llvm
+            EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON"
           elif [[ ${{ matrix.arch }} = "riscv64" ]]; then
             EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_RVVM1=ON -DENFORCE_RVVM2=ON"
             # Disable inline headers, they just don't compile on riscv64
@@ -281,16 +257,16 @@ jobs:
             compiler: gcc
             binfmt: ppc64le
             qemu_cpu: "power10"
-          # - arch: ppc64el
-          #   compiler: llvm
-          #   binfmt: ppc64le
-          #   qemu_cpu: "power10"
+          - arch: ppc64el
+            compiler: llvm
+            binfmt: ppc64le
+            qemu_cpu: "power10"
           # IBM Z
           # TODO: figure out qemu_cpu variable to make tests pass on QEMU
           - arch: s390x
             compiler: gcc
-          # - arch: s390x
-          #   compiler: llvm
+          - arch: s390x
+            compiler: llvm
           # RISC-V
           # - arch: riscv64
           #   compiler: gcc
diff --git a/Configure.cmake b/Configure.cmake
index 7ee52250..1c4cb6fe 100644
--- a/Configure.cmake
+++ b/Configure.cmake
@@ -118,6 +118,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
   set(SLEEF_ARCH_S390X ON CACHE INTERNAL "True for IBM Z architecture.")
 
+  set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-march=z14;-mzvector")
   set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z14;-mzvector")
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
   set(SLEEF_ARCH_RISCV64 ON CACHE INTERNAL "True for RISCV64 architecture.")
diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt
index 352383b1..73818bb0 100644
--- a/src/libm/CMakeLists.txt
+++ b/src/libm/CMakeLists.txt
@@ -489,12 +489,17 @@ if(BUILD_INLINE_HEADERS)
       if(COMPILER_SUPPORTS_${SIMD})
 	string(TOLOWER ${SIMD} SIMDLC)
 
+        if(CMAKE_CROSSCOMPILING AND CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_TARGET)
+          set(FLAG_TARGET --target=${CMAKE_C_COMPILER_TARGET})
+        endif()
+
 	set(INLINE_HEADER_FILE ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h)
 	add_custom_command(
 	  OUTPUT ${INLINE_HEADER_FILE} 
 	  
 	  # Preprocess sleefsimddp.c with SLEEF_GENHEADER defined, comments are preserved
 	  COMMAND "${CMAKE_C_COMPILER}"  ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS}                      # gcc -E -C 
+	  ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}}                                                           # -msse2
 	  ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch    # -I/sleef/src/common -I/sleef/src/arch
 	  ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/                                              # -I/build/src/libm/include
 	  ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME                # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME
@@ -510,6 +515,7 @@ if(BUILD_INLINE_HEADERS)
 
 	  # Preprocess sleefsimdsp.c with SLEEF_GENHEADER defined. Include macroonly*.h instead of helper*.h.
 	  COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS}                       # gcc -E -C 
+	  ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}}                                                           # -msse2
 	  ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch    # -I/sleef/src/common -I/sleef/src/arch
 	  ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/                                              # -I/build/src/libm/include
 	  ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME                # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME
diff --git a/src/quad/CMakeLists.txt b/src/quad/CMakeLists.txt
index f37de951..5eae42da 100644
--- a/src/quad/CMakeLists.txt
+++ b/src/quad/CMakeLists.txt
@@ -234,12 +234,17 @@ if(BUILD_INLINE_HEADERS)
       if(COMPILER_SUPPORTS_${SIMD})
 	string(TOLOWER ${SIMD} SIMDLC)
 
+        if(CMAKE_CROSSCOMPILING AND CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_TARGET)
+          set(FLAG_TARGET --target=${CMAKE_C_COMPILER_TARGET})
+        endif()
+
 	set(INLINE_HEADER_FILE ${PROJECT_BINARY_DIR}/include/sleefquadinline_${SIMDLC}.h)
 	add_custom_command(
 	  OUTPUT ${INLINE_HEADER_FILE} 
   
 	  # Preprocess sleefsimddp.c with SLEEF_GENHEADER defined, comments are preserved
 	  COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS}                        # gcc -E -C 
+	  ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}}                                                            # -msse2
 	  ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch     # -I/sleef/src/common -I/sleef/src/arch
 	  ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/                                               # -I/build/src/quad/include
 	  ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME                 # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME

From 3bcec26039771e8af513ebb184b80292345a633d Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Mon, 13 Nov 2023 22:07:58 +0000
Subject: [PATCH 08/24] Nit: Reorder build-cross targets to group gcc/llvm
 per-arch together

---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 52fa8185..e5bb74ec 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -116,8 +116,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        compiler: [gcc, llvm]
         arch: [aarch64, armhf, ppc64el, s390x, riscv64]
+        compiler: [gcc, llvm]
         include:
           - arch: armhf
             gnupkg: -arm-linux-gnueabihf

From 24278a0788b2405ddac8b2ed354cd82020692bdf Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 14 Nov 2023 16:57:14 +0000
Subject: [PATCH 09/24] Remove unused riscv-gnu-toolchain installation step

---
 .github/workflows/build_and_test.yml | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index e5bb74ec..c04b4c0e 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -140,14 +140,6 @@ jobs:
           sudo apt-get update -y -qq
           sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.gnupkg || format('-{0}-linux-gnu', matrix.arch) }}
 
-      - name: Install gcc
-        run: |
-          RISCV_GNU_TOOLCHAIN_TAG="2023.11.08"
-          curl -L https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${RISCV_GNU_TOOLCHAIN_TAG}/riscv64-glibc-ubuntu-20.04-gcc-nightly-${RISCV_GNU_TOOLCHAIN_TAG}-nightly.tar.gz |
-            tar xzf - -C /opt
-          echo "PATH=/opt/riscv/bin:$PATH" >> $GITHUB_ENV
-        if: ${{ matrix.compiler == 'gcc' && matrix.arch == 'riscv64' }}
-
       - name: Install llvm
         run: |
           LLVM_VERSION="17"

From 35be378d3d99fde39f586a620919fc1184c27f5b Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 14 Nov 2023 18:11:23 +0000
Subject: [PATCH 10/24] Use same compiler versions across all builds

---
 .github/workflows/build_and_test.yml | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index c04b4c0e..5699c460 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -15,6 +15,8 @@ permissions:
   contents: read
 
 env:
+  GCC_VERSION: "12"
+  LLVM_VERSION: "17"
   COMMON_CMAKE_FLAGS: |
     -DSLEEF_SHOW_CONFIG=1
     -DDISABLE_SSL=ON
@@ -44,6 +46,20 @@ jobs:
           sudo apt-get update -y -qq
           sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev
 
+      # Needed for llvm builds as well for target libraries
+      - name: Install gcc
+        run: |
+          sudo apt-get install -y -qq gcc-${GCC_VERSION}
+
+      - name: Install llvm
+        run: |
+          curl -o llvm.sh https://apt.llvm.org/llvm.sh
+          chmod u+x llvm.sh
+          sudo ./llvm.sh ${LLVM_VERSION}
+          sudo ln -srf $(which clang-${LLVM_VERSION}) /usr/bin/clang
+          rm llvm.sh
+        if: ${{ matrix.compiler == 'llvm' }}
+
       - name: Build native
         shell: bash -ex -o pipefail {0}
         run: |
@@ -138,11 +154,15 @@ jobs:
       - name: Install dependencies
         run: |
           sudo apt-get update -y -qq
-          sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.gnupkg || format('-{0}-linux-gnu', matrix.arch) }}
+          sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev
+
+      # Needed for llvm builds as well for target libraries
+      - name: Install gcc
+        run: |
+          sudo apt-get install -y -qq gcc-${GCC_VERSION}${{ matrix.gnupkg || format('-{0}-linux-gnu', matrix.arch) }}
 
       - name: Install llvm
         run: |
-          LLVM_VERSION="17"
           curl -o llvm.sh https://apt.llvm.org/llvm.sh
           chmod u+x llvm.sh
           sudo ./llvm.sh ${LLVM_VERSION}

From abd492ffac8c329f1217bd94fcd0cf63e0cb5adc Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 14 Nov 2023 18:26:42 +0000
Subject: [PATCH 11/24] fixup! Use same compiler versions across all builds

---
 travis/toolchain-aarch64-gcc.cmake | 2 +-
 travis/toolchain-armhf-gcc.cmake   | 2 +-
 travis/toolchain-ppc64el-gcc.cmake | 2 +-
 travis/toolchain-riscv64-gcc.cmake | 2 +-
 travis/toolchain-s390x-gcc.cmake   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/travis/toolchain-aarch64-gcc.cmake b/travis/toolchain-aarch64-gcc.cmake
index c73de216..7f82bf9a 100644
--- a/travis/toolchain-aarch64-gcc.cmake
+++ b/travis/toolchain-aarch64-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "aarch64")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu)
 
-find_program(CMAKE_C_COMPILER aarch64-linux-gnu-gcc aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5)
+find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-12 aarch64-linux-gnu-gcc-11 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
diff --git a/travis/toolchain-armhf-gcc.cmake b/travis/toolchain-armhf-gcc.cmake
index ba233487..af3e1aed 100644
--- a/travis/toolchain-armhf-gcc.cmake
+++ b/travis/toolchain-armhf-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "armhf")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf)
 
-find_program(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5)
+find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-12 arm-linux-gnueabihf-gcc-11 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
diff --git a/travis/toolchain-ppc64el-gcc.cmake b/travis/toolchain-ppc64el-gcc.cmake
index e26a6eaa..19a5a3be 100644
--- a/travis/toolchain-ppc64el-gcc.cmake
+++ b/travis/toolchain-ppc64el-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "ppc64")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu)
 
-find_program(CMAKE_C_COMPILER powerpc64le-linux-gnu-gcc ppc64el-cc)
+find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-12 powerpc64le-linux-gnu-gcc-11 powerpc64le-linux-gnu-gcc ppc64el-cc)
 
 SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar)
 
diff --git a/travis/toolchain-riscv64-gcc.cmake b/travis/toolchain-riscv64-gcc.cmake
index fb7dc230..d23c56d3 100644
--- a/travis/toolchain-riscv64-gcc.cmake
+++ b/travis/toolchain-riscv64-gcc.cmake
@@ -2,7 +2,7 @@ set(CMAKE_CROSSCOMPILING    TRUE)
 set(CMAKE_SYSTEM_NAME       "Linux")
 set(CMAKE_SYSTEM_PROCESSOR  "riscv64")
 
-find_program(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-gcc)
+find_program(CMAKE_C_COMPILER NAMES riscv64-unknown-linux-gnu-gcc-12 riscv64-unknown-linux-gnu-gcc-11 riscv64-unknown-linux-gnu-gcc)
 
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
diff --git a/travis/toolchain-s390x-gcc.cmake b/travis/toolchain-s390x-gcc.cmake
index a2d37bda..f7ba0cb0 100644
--- a/travis/toolchain-s390x-gcc.cmake
+++ b/travis/toolchain-s390x-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "s390x")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu)
 
-find_program(CMAKE_C_COMPILER s390x-linux-gnu-gcc)
+find_program(CMAKE_C_COMPILER NAMES s390x-unknown-linux-gnu-gcc-12 s390x-unknown-linux-gnu-gcc-11 s390x-unknown-linux-gnu-gcc)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)

From b1176459d74c9740971eb18da9c3f1e6a41e9045 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 14 Nov 2023 23:16:51 +0000
Subject: [PATCH 12/24] fixup! Use same compiler versions across all builds

---
 travis/toolchain-aarch64-gcc.cmake |  2 +-
 travis/toolchain-armhf-gcc.cmake   |  2 +-
 travis/toolchain-ppc64el-gcc.cmake |  2 +-
 travis/toolchain-riscv64-gcc.cmake | 16 +++++++++-------
 travis/toolchain-s390x-gcc.cmake   |  2 +-
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/travis/toolchain-aarch64-gcc.cmake b/travis/toolchain-aarch64-gcc.cmake
index 7f82bf9a..710622ea 100644
--- a/travis/toolchain-aarch64-gcc.cmake
+++ b/travis/toolchain-aarch64-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "aarch64")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu)
 
-find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-12 aarch64-linux-gnu-gcc-11 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc)
+find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-12 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
diff --git a/travis/toolchain-armhf-gcc.cmake b/travis/toolchain-armhf-gcc.cmake
index af3e1aed..86576140 100644
--- a/travis/toolchain-armhf-gcc.cmake
+++ b/travis/toolchain-armhf-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "armhf")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf)
 
-find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-12 arm-linux-gnueabihf-gcc-11 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc)
+find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-12 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
diff --git a/travis/toolchain-ppc64el-gcc.cmake b/travis/toolchain-ppc64el-gcc.cmake
index 19a5a3be..bec0de6f 100644
--- a/travis/toolchain-ppc64el-gcc.cmake
+++ b/travis/toolchain-ppc64el-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "ppc64")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu)
 
-find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-12 powerpc64le-linux-gnu-gcc-11 powerpc64le-linux-gnu-gcc ppc64el-cc)
+find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-12 powerpc64le-linux-gnu-gcc ppc64el-cc)
 
 SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar)
 
diff --git a/travis/toolchain-riscv64-gcc.cmake b/travis/toolchain-riscv64-gcc.cmake
index d23c56d3..b0840998 100644
--- a/travis/toolchain-riscv64-gcc.cmake
+++ b/travis/toolchain-riscv64-gcc.cmake
@@ -1,9 +1,11 @@
-set(CMAKE_CROSSCOMPILING    TRUE)
-set(CMAKE_SYSTEM_NAME       "Linux")
-set(CMAKE_SYSTEM_PROCESSOR  "riscv64")
+SET (CMAKE_CROSSCOMPILING   TRUE)
+SET (CMAKE_SYSTEM_NAME      "Linux")
+SET (CMAKE_SYSTEM_PROCESSOR "riscv64")
 
-find_program(CMAKE_C_COMPILER NAMES riscv64-unknown-linux-gnu-gcc-12 riscv64-unknown-linux-gnu-gcc-11 riscv64-unknown-linux-gnu-gcc)
+SET(CMAKE_FIND_ROOT_PATH  /usr/riscv64-linux-gnu /usr/include/riscv64-linux-gnu /usr/lib/riscv64-linux-gnu /lib/riscv64-linux-gnu)
 
-set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
-set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
-set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+find_program(CMAKE_C_COMPILER NAMES riscv64-linux-gnu-gcc-14 riscv64-linux-gnu-gcc)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/travis/toolchain-s390x-gcc.cmake b/travis/toolchain-s390x-gcc.cmake
index f7ba0cb0..8f0ed581 100644
--- a/travis/toolchain-s390x-gcc.cmake
+++ b/travis/toolchain-s390x-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "s390x")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu)
 
-find_program(CMAKE_C_COMPILER NAMES s390x-unknown-linux-gnu-gcc-12 s390x-unknown-linux-gnu-gcc-11 s390x-unknown-linux-gnu-gcc)
+find_program(CMAKE_C_COMPILER NAMES s390x-linux-gnu-gcc-12 s390x-linux-gnu-gcc)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)

From 1c097ca5ba889de2c58dcbc9f9d2ded52d3c025a Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Wed, 15 Nov 2023 00:02:13 +0000
Subject: [PATCH 13/24] Downgrade version to GCC 11

It fails on aarch64, ppc64el, and s390x

https://github.com/shibatch/sleef/actions/runs/6870688901\?pr\=477
---
 .github/workflows/build_and_test.yml | 2 +-
 travis/toolchain-aarch64-gcc.cmake   | 2 +-
 travis/toolchain-armhf-gcc.cmake     | 2 +-
 travis/toolchain-ppc64el-gcc.cmake   | 2 +-
 travis/toolchain-s390x-gcc.cmake     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 5699c460..e5e518d8 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -15,7 +15,7 @@ permissions:
   contents: read
 
 env:
-  GCC_VERSION: "12"
+  GCC_VERSION: "11"
   LLVM_VERSION: "17"
   COMMON_CMAKE_FLAGS: |
     -DSLEEF_SHOW_CONFIG=1
diff --git a/travis/toolchain-aarch64-gcc.cmake b/travis/toolchain-aarch64-gcc.cmake
index 710622ea..c3594551 100644
--- a/travis/toolchain-aarch64-gcc.cmake
+++ b/travis/toolchain-aarch64-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "aarch64")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu)
 
-find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-12 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc)
+find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-11 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
diff --git a/travis/toolchain-armhf-gcc.cmake b/travis/toolchain-armhf-gcc.cmake
index 86576140..24e160b9 100644
--- a/travis/toolchain-armhf-gcc.cmake
+++ b/travis/toolchain-armhf-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "armhf")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf)
 
-find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-12 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc)
+find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-11 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
diff --git a/travis/toolchain-ppc64el-gcc.cmake b/travis/toolchain-ppc64el-gcc.cmake
index bec0de6f..7d6c96ae 100644
--- a/travis/toolchain-ppc64el-gcc.cmake
+++ b/travis/toolchain-ppc64el-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "ppc64")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu)
 
-find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-12 powerpc64le-linux-gnu-gcc ppc64el-cc)
+find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-11 powerpc64le-linux-gnu-gcc ppc64el-cc)
 
 SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar)
 
diff --git a/travis/toolchain-s390x-gcc.cmake b/travis/toolchain-s390x-gcc.cmake
index 8f0ed581..4aa9f12c 100644
--- a/travis/toolchain-s390x-gcc.cmake
+++ b/travis/toolchain-s390x-gcc.cmake
@@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "s390x")
 
 SET(CMAKE_FIND_ROOT_PATH  /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu)
 
-find_program(CMAKE_C_COMPILER NAMES s390x-linux-gnu-gcc-12 s390x-linux-gnu-gcc)
+find_program(CMAKE_C_COMPILER NAMES s390x-linux-gnu-gcc-11 s390x-linux-gnu-gcc)
 
 SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)

From 395a6f64bdedec5635afebe6cccee230c2290ce3 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Wed, 15 Nov 2023 14:47:06 +0000
Subject: [PATCH 14/24] Add rvvm*nofma configuration

---
 CMakeLists.txt                  |  4 +--
 Configure.cmake                 | 10 ++++++++
 src/arch/helperrvv.h            | 44 ++++++++++++++++++++++-----------
 src/common/commonfuncs.h        |  6 ++---
 src/common/dd.h                 |  2 +-
 src/common/df.h                 |  2 +-
 src/libm-tester/iutsimd.c       | 16 ++++++++++--
 src/libm-tester/tester2simddp.c | 18 +++++++++++++-
 src/libm-tester/tester2simdsp.c | 18 +++++++++++++-
 src/libm/CMakeLists.txt         |  6 +++++
 src/libm/sleefsimddp.c          | 26 +++++++++++++++++++
 src/libm/sleefsimdsp.c          | 34 ++++++++++++++++++++++---
 12 files changed, 156 insertions(+), 30 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 40dca676..298a2fee 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,7 +46,7 @@ set(SLEEF_ALL_SUPPORTED_EXTENSIONS
   NEON32 NEON32VFPV4                                    # Aarch32
   VSX VSXNOFMA VSX3 VSX3NOFMA                           # PPC64
   VXE VXENOFMA VXE2 VXE2NOFMA	                        # IBM Z
-  RVVM1 RVVM2                                           # RISC-V Vectors
+  RVVM1NOFMA RVVM1 RVVM2NOFMA RVVM2                     # RISC-V Vectors
   PUREC_SCALAR PURECFMA_SCALAR                          # Generic type
   CACHE STRING "List of SIMD architectures supported by libsleef."
   )
@@ -57,7 +57,7 @@ set(SLEEF_SUPPORTED_LIBM_EXTENSIONS
   NEON32 NEON32VFPV4                                    # Aarch32
   VSX VSXNOFMA VSX3 VSX3NOFMA                           # PPC64
   VXE VXENOFMA VXE2 VXE2NOFMA	                        # IBM Z
-  RVVM1 RVVM2                                           # RISC-V Vectors
+  RVVM1NOFMA RVVM1 RVVM2NOFMA RVVM2                     # RISC-V Vectors
   PUREC_SCALAR PURECFMA_SCALAR                          # Generic type
   CACHE STRING "List of SIMD architectures supported by libsleef."
   )
diff --git a/Configure.cmake b/Configure.cmake
index 1c4cb6fe..ac64f16f 100644
--- a/Configure.cmake
+++ b/Configure.cmake
@@ -168,7 +168,9 @@ set(CLANG_FLAGS_ENABLE_VXE2 "-march=z15;-mzvector")
 set(CLANG_FLAGS_ENABLE_VXE2NOFMA "-march=z15;-mzvector")
 # RISC-V
 set(CLANG_FLAGS_ENABLE_RVVM1 "-march=rv64gcv_zba_zbb_zbs")
+set(CLANG_FLAGS_ENABLE_RVVM1NOFMA "-march=rv64gcv_zba_zbb_zbs")
 set(CLANG_FLAGS_ENABLE_RVVM2 "-march=rv64gcv_zba_zbb_zbs")
+set(CLANG_FLAGS_ENABLE_RVVM2NOFMA "-march=rv64gcv_zba_zbb_zbs")
 
 set(FLAGS_OTHERS "")
 
@@ -634,6 +636,10 @@ if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM1)
   int main() {
     vint32m1_t r = __riscv_vmv_v_x_i32m1(1, __riscv_v_min_vlen / 32); }"
     COMPILER_SUPPORTS_RVVM1)
+
+  if(COMPILER_SUPPORTS_RVVM1)
+    set(COMPILER_SUPPORTS_RVVM1NOFMA 1)
+  endif()
 endif()
 
 if (ENFORCE_RVVM1 AND NOT COMPILER_SUPPORTS_RVVM1)
@@ -652,6 +658,10 @@ if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM2)
   int main() {
     vint32m2_t r = __riscv_vmv_v_x_i32m2(1, __riscv_v_min_vlen / 32); }"
     COMPILER_SUPPORTS_RVVM2)
+
+  if(COMPILER_SUPPORTS_RVVM2)
+    set(COMPILER_SUPPORTS_RVVM2NOFMA 1)
+  endif()
 endif()
 
 if (ENFORCE_RVVM2 AND NOT COMPILER_SUPPORTS_RVVM2)
diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h
index 18fe7642..dbfd2adf 100644
--- a/src/arch/helperrvv.h
+++ b/src/arch/helperrvv.h
@@ -10,7 +10,7 @@
 #endif
 #endif // #if !defined(SLEEF_GENHEADER)
 
-#if CONFIG == 1
+#if CONFIG == 1 || CONFIG == 2
 #define ISANAME "RISC-V Vector Extension with Min. VLEN"
 #define SLEEF_RVV_VLEN __riscv_v_min_vlen
 #else
@@ -45,23 +45,27 @@ static INLINE int vavailability_i(int name) { return -1; }
 
 #ifdef ENABLE_RVV_SP
 // Types that conflict with ENABLE_RVV_DP
-#ifdef ENABLE_RVVM1
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
 typedef vuint64m2_t vmask;
 typedef vbool32_t vopmask;
-#else
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
 typedef vuint64m4_t vmask;
 typedef vbool16_t vopmask;
+#else
+#error "unknown rvv lmul"
 #endif
 #endif
 
 #ifdef ENABLE_RVV_DP
 // Types that conflict with ENABLE_RVV_SP
-#ifdef ENABLE_RVVM1
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
 typedef vuint64m1_t vmask;
 typedef vbool64_t vopmask;
-#else
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
 typedef vuint64m2_t vmask;
 typedef vbool32_t vopmask;
+#else
+#error "unknown rvv lmul"
 #endif
 #endif
 
@@ -73,7 +77,7 @@ typedef vbool32_t vopmask;
 // wide-LMUL register group. In the largest cases (ddi_t and ddf_t), this
 // requires LMUL=8 if the base type (vfloat or vdouble) has LMUL=2, meaning
 // LMUL=2 is currently the widest option for SLEEF function argument types.
-#ifdef ENABLE_RVVM1
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
 
 typedef vint32mf2_t vint;
 typedef vfloat64m1_t vdouble;
@@ -156,7 +160,7 @@ typedef vint32m4_t dfi_t;
 #define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m1
 #define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32mf2
 
-#else
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
 
 typedef vint32m1_t vint;
 typedef vfloat64m2_t vdouble;
@@ -239,6 +243,8 @@ typedef vint32m8_t dfi_t;
 #define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m2
 #define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32m1
 
+#else
+#error "unknown rvv lmul"
 #endif // ENABLE_RVVM1
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -635,19 +641,23 @@ static INLINE vdouble digetd_vd_di(di_t d) {
   return SLEEF_RVV_DP_VGET_VD(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(d), 0);
 }
 static INLINE vint digeti_vi_di(di_t d) {
-#ifdef ENABLE_RVVM1
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
   return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 1));
-#else
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
   return SLEEF_RVV_DP_VGET_VI(d, 2);
+#else
+#error "unknown rvv lmul"
 #endif
 }
 static INLINE di_t disetdi_di_vd_vi(vdouble d, vint i) {
   di_t res;
   res = SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(res), 0, d));
-#ifdef ENABLE_RVVM1
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
   res = __riscv_vset(res, 1, __riscv_vlmul_ext_i32m1(i));
-#else
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
   res = __riscv_vset(res, 2, i);
+#else
+#error "unknown rvv lmul"
 #endif
   return res;
 }
@@ -656,19 +666,23 @@ static INLINE vdouble2 ddigetdd_vd2_ddi(ddi_t d) {
   return SLEEF_RVV_DP_VGET_VD2(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(d), 0);
 }
 static INLINE vint ddigeti_vi_ddi(ddi_t d) {
-#ifdef ENABLE_RVVM1
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
   return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 2));
-#else
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
   return SLEEF_RVV_DP_VGET_VI(d, 4);
+#else
+#error "unknown rvv lmul"
 #endif
 }
 static INLINE ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) {
   ddi_t res;
   res = SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(res), 0, v));
-#ifdef ENABLE_RVVM1
+#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA)
   res = __riscv_vset(res, 2, __riscv_vlmul_ext_i32m1(i));
-#else
+#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
   res = __riscv_vset(res, 4, i);
+#else
+#error "unknown rvv lmul"
 #endif
   return res;
 }
diff --git a/src/common/commonfuncs.h b/src/common/commonfuncs.h
index aff782df..274156fc 100644
--- a/src/common/commonfuncs.h
+++ b/src/common/commonfuncs.h
@@ -3,7 +3,7 @@
 //    (See accompanying file LICENSE.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 typedef struct {
   vdouble x, y, z;
 } vdouble3;
@@ -210,7 +210,7 @@ static INLINE CONST VECTOR_CC vdouble vtoward0_vd_vd(vdouble x) { // returns nex
   return vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vcast_vd_d(0), t);
 }
 
-#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static INLINE CONST vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) {
   return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y)));
 }
@@ -220,7 +220,7 @@ static INLINE CONST VECTOR_CC vdouble vsign_vd_vd(vdouble d) {
   return vmulsign_vd_vd_vd(vcast_vd_d(1.0), d);
 }
 
-#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static INLINE CONST VECTOR_CC vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) {
   return vreinterpret_vd_vm(vor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y)));
 }
diff --git a/src/common/dd.h b/src/common/dd.h
index 89af2e87..3431e42d 100644
--- a/src/common/dd.h
+++ b/src/common/dd.h
@@ -3,7 +3,7 @@
 //    (See accompanying file LICENSE.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 #if !defined(ENABLE_CUDA)
 typedef struct {
   vdouble x, y;
diff --git a/src/common/df.h b/src/common/df.h
index 0883b227..a14c1c6a 100644
--- a/src/common/df.h
+++ b/src/common/df.h
@@ -3,7 +3,7 @@
 //    (See accompanying file LICENSE.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 #if !defined(ENABLE_CUDA)
 typedef struct {
   vfloat x, y;
diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c
index 90353586..03fcd743 100644
--- a/src/libm-tester/iutsimd.c
+++ b/src/libm-tester/iutsimd.c
@@ -349,12 +349,24 @@ typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2;
 #include "renamervvm1.h"
 #endif
 
+#ifdef ENABLE_RVVM1NOFMA
+#define CONFIG 2
+#include "helperrvv.h"
+#include "renamervvm1nofma.h"
+#endif
+
 #ifdef ENABLE_RVVM2
 #define CONFIG 1
 #include "helperrvv.h"
 #include "renamervvm2.h"
 #endif
 
+#ifdef ENABLE_RVVM2NOFMA
+#define CONFIG 2
+#include "helperrvv.h"
+#include "renamervvm2nofma.h"
+#endif
+
 #ifdef ENABLE_PUREC_SCALAR
 #include "renamepurec_scalar.h"
 #if !defined(USE_INLINE_HEADER)
@@ -438,12 +450,12 @@ int check_feature(double d, float f) {
   return 0;
 }
 
-#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2) || defined(USE_INLINE_HEADER))
+#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) || defined(USE_INLINE_HEADER))
 static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; }
 static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; }
 #endif
 
-#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2) || defined(USE_INLINE_HEADER))
+#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) || defined(USE_INLINE_HEADER))
 static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; }
 static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; }
 #endif
diff --git a/src/libm-tester/tester2simddp.c b/src/libm-tester/tester2simddp.c
index 9d723868..5071bb70 100644
--- a/src/libm-tester/tester2simddp.c
+++ b/src/libm-tester/tester2simddp.c
@@ -199,6 +199,14 @@ typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2;
 #include "sleef.h"
 #endif
 
+#ifdef ENABLE_RVVM1NOFMA
+#define CONFIG 2
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm1nofma.h"
+#include "sleef.h"
+#endif
+
 #ifdef ENABLE_RVVM2
 #define CONFIG 1
 #define ENABLE_RVV_DP
@@ -207,6 +215,14 @@ typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2;
 #include "sleef.h"
 #endif
 
+#ifdef ENABLE_RVVM2NOFMA
+#define CONFIG 2
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm2nofma.h"
+#include "sleef.h"
+#endif
+
 #ifdef ENABLE_PUREC_SCALAR
 #define CONFIG 1
 #include "helperpurec_scalar.h"
@@ -225,7 +241,7 @@ typedef Sleef_float_2 vfloat2;
 
 //
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; }
 static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; }
 #endif
diff --git a/src/libm-tester/tester2simdsp.c b/src/libm-tester/tester2simdsp.c
index d83e8b4b..3fb1e619 100644
--- a/src/libm-tester/tester2simdsp.c
+++ b/src/libm-tester/tester2simdsp.c
@@ -199,6 +199,14 @@ typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2;
 #include "sleef.h"
 #endif
 
+#ifdef ENABLE_RVVM1NOFMA
+#define CONFIG 2
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm1nofma.h"
+#include "sleef.h"
+#endif
+
 #ifdef ENABLE_RVVM2
 #define CONFIG 1
 #define ENABLE_RVV_SP
@@ -207,6 +215,14 @@ typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2;
 #include "sleef.h"
 #endif
 
+#ifdef ENABLE_RVVM2NOFMA
+#define CONFIG 2
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#include "renamervvm2nofma.h"
+#include "sleef.h"
+#endif
+
 #ifdef ENABLE_PUREC_SCALAR
 #define CONFIG 1
 #include "helperpurec_scalar.h"
@@ -225,7 +241,7 @@ typedef Sleef_float_2 vfloat2;
 
 //
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; }
 static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; }
 #endif
diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt
index 73818bb0..35b12734 100644
--- a/src/libm/CMakeLists.txt
+++ b/src/libm/CMakeLists.txt
@@ -63,7 +63,9 @@ elseif(SLEEF_ARCH_S390X)
 elseif(SLEEF_ARCH_RISCV64)
   set(SLEEF_HEADER_LIST
     RVVM1
+    RVVM1NOFMA
     RVVM2
+    RVVM2NOFMA
     PUREC_SCALAR
     PURECFMA_SCALAR
     )
@@ -106,7 +108,9 @@ command_arguments(HEADER_PARAMS_VXE2            finz_ 2 4 "SLEEF_VECTOR_DOUBLE"
 command_arguments(HEADER_PARAMS_VXE2NOFMA       cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2nofma)
 
 command_arguments(HEADER_PARAMS_RVVM1           finz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v m1)
+command_arguments(HEADER_PARAMS_RVVM1NOFMA      cinz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v m1nofma)
 command_arguments(HEADER_PARAMS_RVVM2           finz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v m2)
+command_arguments(HEADER_PARAMS_RVVM2NOFMA      cinz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v m2nofma)
 
 command_arguments(HEADER_PARAMS_DSP_SCALAR      -     1 1 double float int32_t int32_t __STDC__)
 command_arguments(HEADER_PARAMS_PUREC_SCALAR    cinz_ 1 1 double float int32_t int32_t __STDC__ purec)
@@ -155,7 +159,9 @@ command_arguments(RENAME_PARAMS_GNUABI_ADVSIMD  advsimd n 2 4 float64x2_t float3
 command_arguments(RENAME_PARAMS_GNUABI_SVE sve s x x svfloat64_t svfloat32_t svint32_t svint32_t __ARM_SVE)
 
 command_arguments(RENAME_PARAMS_RVVM1           finz_ x x m1)
+command_arguments(RENAME_PARAMS_RVVM1NOFMA      cinz_ x x m1nofma)
 command_arguments(RENAME_PARAMS_RVVM2           finz_ x x m2)
+command_arguments(RENAME_PARAMS_RVVM2NOFMA      cinz_ x x m2nofma)
 
 # ALIAS_PARAMS
 
diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c
index 68c13a34..6cdaa6ad 100644
--- a/src/libm/sleefsimddp.c
+++ b/src/libm/sleefsimddp.c
@@ -235,6 +235,19 @@ extern const double Sleef_rempitabdp[];
 #endif
 #endif
 
+#ifdef ENABLE_RVVM1NOFMA
+#define CONFIG 2
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM1NOFMA.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm1nofma.h"
+#endif
+#endif /* ENABLE_RVVM1NOFMA */
+
 #ifdef ENABLE_RVVM2
 #define CONFIG 1
 #if !defined(SLEEF_GENHEADER)
@@ -248,6 +261,19 @@ extern const double Sleef_rempitabdp[];
 #endif
 #endif
 
+#ifdef ENABLE_RVVM2NOFMA
+#define CONFIG 2
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_DP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM2NOFMA.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm2nofma.h"
+#endif
+#endif /* ENABLE_RVVM2NOFMA */
+
 // Generic
 
 #ifdef ENABLE_VECEXT
diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c
index 5ec5a082..c5dbc2a5 100644
--- a/src/libm/sleefsimdsp.c
+++ b/src/libm/sleefsimdsp.c
@@ -335,6 +335,19 @@ extern const float Sleef_rempitabsp[];
 #endif
 #endif
 
+#ifdef ENABLE_RVVM1NOFMA
+#define CONFIG 2
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM1NOFMA.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm1nofma.h"
+#endif
+#endif
+
 #ifdef ENABLE_RVVM2
 #define CONFIG 1
 #if !defined(SLEEF_GENHEADER)
@@ -348,6 +361,19 @@ extern const float Sleef_rempitabsp[];
 #endif
 #endif
 
+#ifdef ENABLE_RVVM2NOFMA
+#define CONFIG 2
+#if !defined(SLEEF_GENHEADER)
+#define ENABLE_RVV_SP
+#include "helperrvv.h"
+#else
+#include "macroonlyRVVM2NOFMA.h"
+#endif
+#ifdef DORENAME
+#include "renamervvm2nofma.h"
+#endif
+#endif
+
 // Generic
 
 #ifdef ENABLE_VECEXT
@@ -428,7 +454,7 @@ static INLINE CONST VECTOR_CC vmask vsignbit_vm_vf(vfloat f) {
   return vand_vm_vm_vm(vreinterpret_vm_vf(f), vreinterpret_vm_vf(vcast_vf_f(-0.0f)));
 }
 
-#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static INLINE CONST VECTOR_CC vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) {
   return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y)));
 }
@@ -516,7 +542,7 @@ static INLINE CONST VECTOR_CC vfloat vldexp3_vf_vf_vi2(vfloat d, vint2 q) {
 
 EXPORT CONST VECTOR_CC vfloat xldexpf(vfloat x, vint2 q) { return vldexp_vf_vf_vi2(x, q); }
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 typedef struct {
   vfloat d;
   vint2 i;
@@ -546,7 +572,7 @@ static dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) {
 }
 #endif
 
-#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
+#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
 static INLINE CONST VECTOR_CC vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) {
   return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y)));
 }
@@ -3321,7 +3347,7 @@ EXPORT CONST VECTOR_CC vfloat xcospif_u05(vfloat d) {
 }
 #endif // #if !defined(DETERMINISTIC)
 
-#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2))
+#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA))
   typedef struct {
     vfloat2 a, b;
   } df2;

From ce3f65a210a04bdb5cec31cd2f384a4e0078987d Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Wed, 15 Nov 2023 16:24:34 +0000
Subject: [PATCH 15/24] Fix CONFIG for RVV in helperrvv

---
 src/arch/helperrvv.h | 60 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h
index dbfd2adf..610c99f1 100644
--- a/src/arch/helperrvv.h
+++ b/src/arch/helperrvv.h
@@ -13,19 +13,34 @@
 #if CONFIG == 1 || CONFIG == 2
 #define ISANAME "RISC-V Vector Extension with Min. VLEN"
 #define SLEEF_RVV_VLEN __riscv_v_min_vlen
+#elif CONFIG == 8
+// 256-bit vector length
+#define ISANAME "RISC-V Vector Extension 256-bit"
+#define SLEEF_RVV_VLEN (1 << 8)
+#elif CONFIG == 9
+// 512-bit vector length
+#define ISANAME "RISC-V Vector Extension 512-bit"
+#define SLEEF_RVV_VLEN (1 << 9)
+#elif CONFIG == 10
+// 1024-bit vector length
+#define ISANAME "RISC-V Vector Extension 1024-bit"
+#define SLEEF_RVV_VLEN (1 << 0)
+#elif CONFIG == 11
+// 2048-bit vector length
+#define ISANAME "RISC-V Vector Extension 2048-bit"
+#define SLEEF_RVV_VLEN (1 << 1)
 #else
-#define ISANAME "RISC-V Vector Extension VLEN=2^"#CONFIG
-#define SLEEF_RVV_VLEN (1 << CONFIG)
-#endif
-
-#ifndef CONFIG
-#error CONFIG macro not defined
+#error CONFIG macro invalid or not defined
 #endif
 
 #define ENABLE_SP
-#define ENABLE_FMA_DP
 #define ENABLE_DP
 
+#if CONFIG != 2
+#define ENABLE_FMA_SP
+#define ENABLE_FMA_DP
+#endif
+
 static INLINE int vavailability_i(int name) { return -1; }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -405,13 +420,33 @@ static INLINE vfloat vrec_vf_vf(vfloat d) {
 static INLINE vfloat vsqrt_vf_vf(vfloat d) {
   return __riscv_vfsqrt(d, VECTLENSP);
 }
-// fused multiply-add/subtract
+#if defined(ENABLE_FMA_SP)
+// Multiply accumulate: z = z + x * y
 static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
   return __riscv_vfmadd(x, y, z, VECTLENSP);
 }
+// Multiply subtract: z = z - x * y
 static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
   return __riscv_vfnmsub(x, y, z, VECTLENSP);
 }
+static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
+  return __riscv_vfmsub(x, y, z, VECTLENSP);
+}
+#else
+static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); }
+static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); }
+static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); }
+#endif
+// fused multiply add / sub
+static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // (x * y) + z
+  return __riscv_vfmadd(x, y, z, VECTLENSP);
+}
+static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // -(x * y) + z
+  return __riscv_vfnmsub(x, y, z, VECTLENSP);
+}
+static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // (x * y) - z
+  return __riscv_vfmsub(x, y, z, VECTLENSP);
+}
 // sign manipulation
 static INLINE vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) {
   return __riscv_vfsgnjx(x, y, VECTLENSP);
@@ -782,13 +817,20 @@ static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) {
 static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) {
   return __riscv_vfmin(x, y, VECTLENDP);
 }
-// fused multiply add / sub
+#if defined(ENABLE_FMA_DP)
+// Multiply accumulate: z = z + x * y
 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
   return __riscv_vfmadd(x, y, z, VECTLENDP);
 }
+// Multiply subtract: z = z - x * y
 static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
   return __riscv_vfmsub(x, y, z, VECTLENDP);
 }
+#else
+static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
+static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); }
+#endif
+// fused multiply add / sub
 static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
   return __riscv_vfmadd(x, y, z, VECTLENDP);
 }

From 549d2120448e1ed173b98b6fe8831a81d44051b4 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Wed, 15 Nov 2023 20:17:17 +0000
Subject: [PATCH 16/24] fixup! Fix CONFIG for RVV in helperrvv

---
 src/arch/helperrvv.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h
index 610c99f1..8f2eb204 100644
--- a/src/arch/helperrvv.h
+++ b/src/arch/helperrvv.h
@@ -24,11 +24,11 @@
 #elif CONFIG == 10
 // 1024-bit vector length
 #define ISANAME "RISC-V Vector Extension 1024-bit"
-#define SLEEF_RVV_VLEN (1 << 0)
+#define SLEEF_RVV_VLEN (1 << 10)
 #elif CONFIG == 11
 // 2048-bit vector length
 #define ISANAME "RISC-V Vector Extension 2048-bit"
-#define SLEEF_RVV_VLEN (1 << 1)
+#define SLEEF_RVV_VLEN (1 << 11)
 #else
 #error CONFIG macro invalid or not defined
 #endif

From e1b59d388395c36a5fa3324fd40973370c47fa78 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Wed, 15 Nov 2023 20:19:26 +0000
Subject: [PATCH 17/24] fixup! Fix CONFIG for RVV in helperrvv

---
 src/arch/helperrvv.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h
index 8f2eb204..3090121c 100644
--- a/src/arch/helperrvv.h
+++ b/src/arch/helperrvv.h
@@ -13,6 +13,10 @@
 #if CONFIG == 1 || CONFIG == 2
 #define ISANAME "RISC-V Vector Extension with Min. VLEN"
 #define SLEEF_RVV_VLEN __riscv_v_min_vlen
+#elif CONFIG == 7
+// 128-bit vector length
+#define ISANAME "RISC-V Vector Extension 128-bit"
+#define SLEEF_RVV_VLEN (1 << 7)
 #elif CONFIG == 8
 // 256-bit vector length
 #define ISANAME "RISC-V Vector Extension 256-bit"

From 105ff56119d455958c8f48aabba130b4b62fef15 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Thu, 16 Nov 2023 13:32:31 +0000
Subject: [PATCH 18/24] Add tester3 for RISC-V

---
 .github/workflows/build_and_test.yml |  3 ++-
 CMakeLists.txt                       |  4 +++
 src/libm-tester/CMakeLists.txt       |  8 ++++++
 src/libm-tester/tester3.c            | 40 +++++++++++++++++++++++++++-
 src/libm/CMakeLists.txt              | 16 +++++------
 5 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index e5e518d8..1953cbc5 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -19,13 +19,14 @@ env:
   LLVM_VERSION: "17"
   COMMON_CMAKE_FLAGS: |
     -DSLEEF_SHOW_CONFIG=1
-    -DDISABLE_SSL=ON
     -DBUILD_GNUABI_LIBS=ON
     -DBUILD_INLINE_HEADERS=ON
     -DBUILD_DFT=ON
     -DBUILD_QUAD=ON
     -DBUILD_SCALAR_LIB=ON
     -DBUILD_STATIC_TEST_BINS=ON
+    -DENFORCE_TESTER=ON
+    -DENFORCE_TESTER3=ON
 
 jobs:
   build-native:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 298a2fee..6bd3ab90 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -87,6 +87,10 @@ set(COSTOVERRIDE_NEON32 2)
 set(COSTOVERRIDE_NEON32VFPV4 2)
 set(COSTOVERRIDE_SVE 10)
 set(COSTOVERRIDE_SVENOFMA 10)
+set(COSTOVERRIDE_RVVM1 10)
+set(COSTOVERRIDE_RVVM1NOFMA 10)
+set(COSTOVERRIDE_RVVM2 10)
+set(COSTOVERRIDE_RVVM2NOFMA 10)
 
 #
 
diff --git a/src/libm-tester/CMakeLists.txt b/src/libm-tester/CMakeLists.txt
index 69f2ba6e..41d6f36f 100644
--- a/src/libm-tester/CMakeLists.txt
+++ b/src/libm-tester/CMakeLists.txt
@@ -27,6 +27,11 @@ set(TESTER3_DEFINITIONS_VXENOFMA  ATR=cinz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SL
 set(TESTER3_DEFINITIONS_VXE2      ATR=finz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vxe2)
 set(TESTER3_DEFINITIONS_VXE2NOFMA ATR=cinz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vxe2nofma)
 
+set(TESTER3_DEFINITIONS_RVVM1      ATR=finz_ DPTYPE=vfloat64m1_t SPTYPE=vfloat32m1_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm1 ENABLE_RVVM1)
+set(TESTER3_DEFINITIONS_RVVM1NOFMA ATR=cinz_ DPTYPE=vfloat64m1_t SPTYPE=vfloat32m1_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm1nofma ENABLE_RVVM1)
+set(TESTER3_DEFINITIONS_RVVM2      ATR=finz_ DPTYPE=vfloat64m2_t SPTYPE=vfloat32m2_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm2 ENABLE_RVVM2)
+set(TESTER3_DEFINITIONS_RVVM2NOFMA ATR=cinz_ DPTYPE=vfloat64m2_t SPTYPE=vfloat32m2_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm2nofma ENABLE_RVVM2)
+
 set(TESTER3_DEFINITIONS_PUREC_SCALAR    ATR=cinz_ DPTYPE=double SPTYPE=float DPTYPESPEC=d1 SPTYPESPEC=f1 EXTSPEC=purec)
 set(TESTER3_DEFINITIONS_PURECFMA_SCALAR ATR=finz_ DPTYPE=double SPTYPE=float DPTYPESPEC=d1 SPTYPESPEC=f1 EXTSPEC=purecfma)
 
@@ -47,6 +52,9 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
   set(TEST3_CINZ purec_scalar vxenofma vxe2nofma)
   set(TEST3_FINZ purecfma_scalar vxe vxe2)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
+  set(TEST3_CINZ purec_scalar rvvm1nofma rvvm2nofma)
+  set(TEST3_FINZ purecfma_scalar rvvm1 rvvm2)
 endif()
 
 #
diff --git a/src/libm-tester/tester3.c b/src/libm-tester/tester3.c
index f4b27e56..3027dff3 100644
--- a/src/libm-tester/tester3.c
+++ b/src/libm-tester/tester3.c
@@ -101,6 +101,44 @@ static INLINE __attribute__((vector_size(16))) float setSLEEF_VECTOR_FLOAT(float
 static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float v, int r) { return unifyValuef(v[r & 3]); }
 #endif
 
+#if __riscv && __riscv_v
+
+#if defined(ENABLE_RVVM1)
+#define VECTLENSP (1 * __riscv_v_min_vlen / 32)
+#define VECTLENDP (1 * __riscv_v_min_vlen / 64)
+
+static INLINE vfloat32m1_t setvfloat32m1_t(float d, int r)  { float  a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m1(a, VECTLENSP); }
+static INLINE float getvfloat32m1_t(vfloat32m1_t v, int r)  { float  a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); }
+static INLINE vfloat64m1_t setvfloat64m1_t(double d, int r) { double a[VECTLENDP]; memrand(a, sizeof(a)); a[r & (VECTLENDP-1)] = d; return __riscv_vle64_v_f64m1(a, VECTLENDP); }
+static INLINE double getvfloat64m1_t(vfloat64m1_t v, int r) { double a[VECTLENDP]; __riscv_vse64(a, v, VECTLENDP); return unifyValue(a[r & (VECTLENDP-1)]); }
+
+static vfloat32m1_t vf2getx_vf_vf2(vfloat32m2_t v) { return __riscv_vget_f32m1(v, 0); }
+static vfloat32m1_t vf2gety_vf_vf2(vfloat32m2_t v) { return __riscv_vget_f32m1(v, 1); }
+static vfloat64m1_t vd2getx_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 0); }
+static vfloat64m1_t vd2gety_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 1); }
+
+#elif defined(ENABLE_RVVM2)
+#define VECTLENSP (2 * __riscv_v_min_vlen / 32)
+#define VECTLENDP (2 * __riscv_v_min_vlen / 64)
+
+static INLINE vfloat32m2_t setvfloat32m2_t(float d, int r)  { float  a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m2(a, VECTLENSP); }
+static INLINE float getvfloat32m2_t(vfloat32m2_t v, int r)  { float  a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); }
+static INLINE vfloat64m2_t setvfloat64m2_t(double d, int r) { double a[VECTLENDP]; memrand(a, sizeof(a)); a[r & (VECTLENDP-1)] = d; return __riscv_vle64_v_f64m2(a, VECTLENDP); }
+static INLINE double getvfloat64m2_t(vfloat64m2_t v, int r) { double a[VECTLENDP]; __riscv_vse64(a, v, VECTLENDP); return unifyValue(a[r & (VECTLENDP-1)]); }
+
+static vfloat32m2_t vf2getx_vf_vf2(vfloat32m4_t v) { return __riscv_vget_f32m2(v, 0); }
+static vfloat32m2_t vf2gety_vf_vf2(vfloat32m4_t v) { return __riscv_vget_f32m2(v, 1); }
+static vfloat64m2_t vd2getx_vd_vd2(vfloat64m4_t v) { return __riscv_vget_f64m2(v, 0); }
+static vfloat64m2_t vd2gety_vd_vd2(vfloat64m4_t v) { return __riscv_vget_f64m2(v, 1); }
+
+#else
+#error "unknown RVV"
+#endif
+
+#undef VECTLENSP
+#undef VECTLENDP
+#endif
+
 //
 
 // ATR = cinz_, NAME = sin, TYPE = d2, ULP = u35, EXT = sse2
@@ -110,7 +148,7 @@ static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float
 #define SET(TYPE) set ## TYPE
 #define GET(TYPE) get ## TYPE
 
-#ifndef __ARM_FEATURE_SVE
+#if !defined(__ARM_FEATURE_SVE) && !(defined(__riscv) && defined(__riscv_v))
 static DPTYPE vd2getx_vd_vd2(TYPE2(DPTYPE) v) { return v.x; }
 static DPTYPE vd2gety_vd_vd2(TYPE2(DPTYPE) v) { return v.y; }
 static SPTYPE vf2getx_vf_vf2(TYPE2(SPTYPE) v) { return v.x; }
diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt
index 35b12734..a0e4029d 100644
--- a/src/libm/CMakeLists.txt
+++ b/src/libm/CMakeLists.txt
@@ -107,10 +107,10 @@ command_arguments(HEADER_PARAMS_VXENOFMA        cinz_ 2 4 "SLEEF_VECTOR_DOUBLE"
 command_arguments(HEADER_PARAMS_VXE2            finz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2)
 command_arguments(HEADER_PARAMS_VXE2NOFMA       cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2nofma)
 
-command_arguments(HEADER_PARAMS_RVVM1           finz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v m1)
-command_arguments(HEADER_PARAMS_RVVM1NOFMA      cinz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v m1nofma)
-command_arguments(HEADER_PARAMS_RVVM2           finz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v m2)
-command_arguments(HEADER_PARAMS_RVVM2NOFMA      cinz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v m2nofma)
+command_arguments(HEADER_PARAMS_RVVM1           finz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v rvvm1)
+command_arguments(HEADER_PARAMS_RVVM1NOFMA      cinz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v rvvm1nofma)
+command_arguments(HEADER_PARAMS_RVVM2           finz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v rvvm2)
+command_arguments(HEADER_PARAMS_RVVM2NOFMA      cinz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v rvvm2nofma)
 
 command_arguments(HEADER_PARAMS_DSP_SCALAR      -     1 1 double float int32_t int32_t __STDC__)
 command_arguments(HEADER_PARAMS_PUREC_SCALAR    cinz_ 1 1 double float int32_t int32_t __STDC__ purec)
@@ -158,10 +158,10 @@ command_arguments(RENAME_PARAMS_GNUABI_ADVSIMD  advsimd n 2 4 float64x2_t float3
 # the "x" token of VLA SVE vector functions.
 command_arguments(RENAME_PARAMS_GNUABI_SVE sve s x x svfloat64_t svfloat32_t svint32_t svint32_t __ARM_SVE)
 
-command_arguments(RENAME_PARAMS_RVVM1           finz_ x x m1)
-command_arguments(RENAME_PARAMS_RVVM1NOFMA      cinz_ x x m1nofma)
-command_arguments(RENAME_PARAMS_RVVM2           finz_ x x m2)
-command_arguments(RENAME_PARAMS_RVVM2NOFMA      cinz_ x x m2nofma)
+command_arguments(RENAME_PARAMS_RVVM1           finz_ x x rvvm1)
+command_arguments(RENAME_PARAMS_RVVM1NOFMA      cinz_ x x rvvm1nofma)
+command_arguments(RENAME_PARAMS_RVVM2           finz_ x x rvvm2)
+command_arguments(RENAME_PARAMS_RVVM2NOFMA      cinz_ x x rvvm2nofma)
 
 # ALIAS_PARAMS
 

From 562a69ced35057a413e84df66065e829f762d5a2 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Thu, 16 Nov 2023 13:50:13 +0000
Subject: [PATCH 19/24] Add sysroot for dependencies

---
 .github/workflows/build_and_test.yml | 34 ++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 1953cbc5..d11cc549 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -137,9 +137,13 @@ jobs:
         compiler: [gcc, llvm]
         include:
           - arch: armhf
+            binfmt: arm
             gnupkg: -arm-linux-gnueabihf
           - arch: ppc64el
+            binfmt: ppc64le
             gnupkg: -powerpc64le-linux-gnu
+          - arch: aarch64
+            debarch: arm64
         exclude:
           # Only GCC trunk supports the RISC-V V intrinsics and https://github.com/riscv-collab/riscv-gnu-toolchain
           # doesn't track a recent enough version yet
@@ -155,7 +159,7 @@ jobs:
       - name: Install dependencies
         run: |
           sudo apt-get update -y -qq
-          sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev
+          sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev debootstrap
 
       # Needed for llvm builds as well for target libraries
       - name: Install gcc
@@ -171,6 +175,30 @@ jobs:
           rm llvm.sh
         if: ${{ matrix.compiler == 'llvm' }}
 
+      - name: Setup QEMU
+        uses: docker/setup-qemu-action@v3.0.0
+        with:
+          platforms: ${{ matrix.binfmt || matrix.arch }}
+
+      - name: Check sysroot cache
+        id: check-sysroot-cache
+        uses: actions/cache@v3
+        with:
+          path: sysroot
+          key: sysroot-${{ matrix.arch }}-${{ hashFiles('./.github/workflows/build_and_test.yml') }}
+
+      - name: Create sysroot
+        run: |
+          sudo debootstrap --arch=${{ matrix.debarch || matrix.arch }} --verbose --include=fakeroot,symlinks,libmpfr-dev,libssl-dev --resolve-deps --variant=minbase --components=main,universe focal sysroot
+          # Remove unused files to minimize cache
+          sudo chroot sysroot symlinks -cr .
+          sudo chown ${USER} -R sysroot
+          rm -rf sysroot/{dev,proc,run,sys,var}
+          rm -rf sysroot/usr/{sbin,bin,share}
+          rm -rf sysroot/usr/lib/{apt,gcc,udev,systemd}
+          rm -rf sysroot/usr/libexec/gcc
+        if: steps.check-sysroot-cache.outputs.cache-hit != 'true'
+
       - name: Download build-native-${{ matrix.compiler }} artifacts
         uses: actions/download-artifact@v3
         with:
@@ -208,6 +236,7 @@ jobs:
           cmake -S . -B _build-${{ matrix.arch }} -GNinja \
             -DCMAKE_INSTALL_PREFIX="$(pwd)/_install-${{ matrix.arch }}" \
             -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-${{ matrix.arch }}-${{ matrix.compiler }}.cmake \
+            -DCMAKE_SYSROOT=$(pwd)/sysroot \
             -DNATIVE_BUILD_DIR="$(pwd)/_build-native" \
             ${COMMON_CMAKE_FLAGS} \
             ${EXTRA_CMAKE_FLAGS}
@@ -312,7 +341,8 @@ jobs:
         with:
           persist-credentials: false
 
-      - uses: docker/setup-qemu-action@v3.0.0
+      - name: Setup QEMU
+        uses: docker/setup-qemu-action@v3.0.0
         with:
           platforms: ${{ matrix.binfmt || matrix.arch }}
 

From 7de9fd489c16d35607b5fccc18d5bc6003fd1cb4 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Fri, 17 Nov 2023 14:09:37 +0000
Subject: [PATCH 20/24] Fix rounding functions

---
 src/arch/helperrvv.h | 41 ++++++++++++++++-------------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h
index 3090121c..fff0b3fa 100644
--- a/src/arch/helperrvv.h
+++ b/src/arch/helperrvv.h
@@ -140,6 +140,9 @@ typedef vint32m4_t dfi_t;
 #define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m2
 #define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m1
 #define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m1
+#define SLEEF_RVV_SP_VFNCVT_X_F_VI __riscv_vfcvt_x_f_v_i32m1_rm
+#define SLEEF_RVV_SP_VFCVT_F_X_VF __riscv_vfcvt_f_x_v_f32m1
+#define SLEEF_RVV_SP_VFCVT_X_F_VF_RM __riscv_vfcvt_x_f_v_i32m1_rm
 #define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m1
 #define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP)
 #define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32mf2
@@ -178,6 +181,9 @@ typedef vint32m4_t dfi_t;
 #define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1
 #define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m1
 #define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32mf2
+#define SLEEF_RVV_DP_VFNCVT_X_F_VI __riscv_vfncvt_x_f_w_i32mf2_rm
+#define SLEEF_RVV_DP_VFCVT_F_X_VD __riscv_vfcvt_f_x_v_f64m1
+#define SLEEF_RVV_DP_VFCVT_X_F_VD_RM __riscv_vfcvt_x_f_v_i64m1_rm
 
 #elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)
 
@@ -223,6 +229,9 @@ typedef vint32m8_t dfi_t;
 #define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m4
 #define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m2
 #define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m2
+#define SLEEF_RVV_SP_VFNCVT_X_F_VI __riscv_vfcvt_x_f_v_i32m2_rm
+#define SLEEF_RVV_SP_VFCVT_F_X_VF __riscv_vfcvt_f_x_v_f32m2
+#define SLEEF_RVV_SP_VFCVT_X_F_VF_RM __riscv_vfcvt_x_f_v_i32m2_rm
 #define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m2
 #define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP)
 #define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32m1
@@ -261,6 +270,9 @@ typedef vint32m8_t dfi_t;
 #define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1
 #define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m2
 #define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32m1
+#define SLEEF_RVV_DP_VFNCVT_X_F_VI __riscv_vfncvt_x_f_w_i32m1_rm
+#define SLEEF_RVV_DP_VFCVT_F_X_VD __riscv_vfcvt_f_x_v_f64m2
+#define SLEEF_RVV_DP_VFCVT_X_F_VD_RM __riscv_vfcvt_x_f_v_i64m2_rm
 
 #else
 #error "unknown rvv lmul"
@@ -344,13 +356,7 @@ static INLINE vfloat vcast_vf_f(float f) {
   return SLEEF_RVV_SP_VCAST_VF_F(f, VECTLENSP);
 }
 static INLINE vfloat vrint_vf_vf(vfloat vd) {
-  // It is not currently possible to safely set frm for intrinsics,
-  // so emulate round-to-nearest behavior
-  vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP);
-  half = __riscv_vfsgnj(half, vd, VECTLENSP);
-  vfloat res = __riscv_vfadd(vd, half, VECTLENSP);
-  vint2 i = __riscv_vfcvt_rtz_x(res, VECTLENSP);
-  return __riscv_vfcvt_f(i, VECTLENSP);
+  return SLEEF_RVV_SP_VFCVT_F_X_VF(SLEEF_RVV_SP_VFCVT_X_F_VF_RM(vd, __RISCV_VXRM_RNU, VECTLENSP), VECTLENSP);
 }
 static INLINE vfloat vcast_vf_vi2(vint2 vi) {
   return __riscv_vfcvt_f(vi, VECTLENSP);
@@ -359,12 +365,7 @@ static INLINE vint2 vcast_vi2_i(int i) {
   return SLEEF_RVV_SP_VCAST_VI2_I(i, VECTLENSP);
 }
 static INLINE vint2 vrint_vi2_vf(vfloat vf) {
-  // It is not currently possible to safely set frm for intrinsics,
-  // so emulate round-to-nearest behavior
-  vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP);
-  half = __riscv_vfsgnj(half, vf, VECTLENSP);
-  vfloat res = __riscv_vfadd(vf, half, VECTLENSP);
-  return __riscv_vfcvt_rtz_x(res, VECTLENSP);
+  return SLEEF_RVV_SP_VFNCVT_X_F_VI(vf, __RISCV_VXRM_RNU, VECTLENSP);
 }
 static INLINE vint2 vtruncate_vi2_vf(vfloat vf) {
   return __riscv_vfcvt_rtz_x(vf, VECTLENSP);
@@ -742,20 +743,10 @@ static INLINE vint vcast_vi_i(int32_t i) {
   return SLEEF_RVV_DP_VCAST_VI_I(i, VECTLENDP);
 }
 static INLINE vint vrint_vi_vd(vdouble vd) {
-  // It is not currently possible to safely set frm for intrinsics,
-  // so emulate round-to-nearest behavior
-  vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP);
-  half = __riscv_vfsgnj(half, vd, VECTLENDP);
-  vdouble res = __riscv_vfadd(vd, half, VECTLENDP);
-  return __riscv_vfncvt_rtz_x(res, VECTLENDP);
+  return SLEEF_RVV_DP_VFNCVT_X_F_VI(vd, __RISCV_VXRM_RNU, VECTLENDP);
 }
 static INLINE vdouble vrint_vd_vd(vdouble vd) {
-  // It is not currently possible to safely set frm for intrinsics,
-  // so emulate round-to-nearest behavior
-  vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP);
-  half = __riscv_vfsgnj(half, vd, VECTLENDP);
-  vdouble res = __riscv_vfadd(vd, half, VECTLENDP);
-  return __riscv_vfwcvt_f(__riscv_vfncvt_rtz_x(res, VECTLENDP), VECTLENDP);
+  return SLEEF_RVV_DP_VFCVT_F_X_VD(SLEEF_RVV_DP_VFCVT_X_F_VD_RM(vd, __RISCV_VXRM_RNU, VECTLENDP), VECTLENDP);
 }
 static INLINE vint vtruncate_vi_vd(vdouble vd) {
   return __riscv_vfncvt_rtz_x(vd, VECTLENDP);

From 6a3551cd21a1eaa4937aab14bad33b1072c98e51 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Fri, 17 Nov 2023 23:28:59 +0000
Subject: [PATCH 21/24] Use correct rounding mode

---
 src/arch/helperrvv.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h
index fff0b3fa..10c734e2 100644
--- a/src/arch/helperrvv.h
+++ b/src/arch/helperrvv.h
@@ -356,7 +356,7 @@ static INLINE vfloat vcast_vf_f(float f) {
   return SLEEF_RVV_SP_VCAST_VF_F(f, VECTLENSP);
 }
 static INLINE vfloat vrint_vf_vf(vfloat vd) {
-  return SLEEF_RVV_SP_VFCVT_F_X_VF(SLEEF_RVV_SP_VFCVT_X_F_VF_RM(vd, __RISCV_VXRM_RNU, VECTLENSP), VECTLENSP);
+  return SLEEF_RVV_SP_VFCVT_F_X_VF(SLEEF_RVV_SP_VFCVT_X_F_VF_RM(vd, __RISCV_FRM_RNE, VECTLENSP), VECTLENSP);
 }
 static INLINE vfloat vcast_vf_vi2(vint2 vi) {
   return __riscv_vfcvt_f(vi, VECTLENSP);
@@ -365,7 +365,7 @@ static INLINE vint2 vcast_vi2_i(int i) {
   return SLEEF_RVV_SP_VCAST_VI2_I(i, VECTLENSP);
 }
 static INLINE vint2 vrint_vi2_vf(vfloat vf) {
-  return SLEEF_RVV_SP_VFNCVT_X_F_VI(vf, __RISCV_VXRM_RNU, VECTLENSP);
+  return SLEEF_RVV_SP_VFNCVT_X_F_VI(vf, __RISCV_FRM_RNE, VECTLENSP);
 }
 static INLINE vint2 vtruncate_vi2_vf(vfloat vf) {
   return __riscv_vfcvt_rtz_x(vf, VECTLENSP);
@@ -743,10 +743,10 @@ static INLINE vint vcast_vi_i(int32_t i) {
   return SLEEF_RVV_DP_VCAST_VI_I(i, VECTLENDP);
 }
 static INLINE vint vrint_vi_vd(vdouble vd) {
-  return SLEEF_RVV_DP_VFNCVT_X_F_VI(vd, __RISCV_VXRM_RNU, VECTLENDP);
+  return SLEEF_RVV_DP_VFNCVT_X_F_VI(vd, __RISCV_FRM_RNE, VECTLENDP);
 }
 static INLINE vdouble vrint_vd_vd(vdouble vd) {
-  return SLEEF_RVV_DP_VFCVT_F_X_VD(SLEEF_RVV_DP_VFCVT_X_F_VD_RM(vd, __RISCV_VXRM_RNU, VECTLENDP), VECTLENDP);
+  return SLEEF_RVV_DP_VFCVT_F_X_VD(SLEEF_RVV_DP_VFCVT_X_F_VD_RM(vd, __RISCV_FRM_RNE, VECTLENDP), VECTLENDP);
 }
 static INLINE vint vtruncate_vi_vd(vdouble vd) {
   return __riscv_vfncvt_rtz_x(vd, VECTLENDP);

From ca8e731c30fb23dc644575858f675d82c9495560 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Sat, 18 Nov 2023 22:23:01 +0100
Subject: [PATCH 22/24] Use __riscv_vlenb

---
 CMakeLists.txt            |  5 +++--
 src/arch/helperrvv.h      | 20 ++++++++++----------
 src/libm-tester/tester3.c |  8 ++++----
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6bd3ab90..c89fa6c6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -89,8 +89,9 @@ set(COSTOVERRIDE_SVE 10)
 set(COSTOVERRIDE_SVENOFMA 10)
 set(COSTOVERRIDE_RVVM1 10)
 set(COSTOVERRIDE_RVVM1NOFMA 10)
-set(COSTOVERRIDE_RVVM2 10)
-set(COSTOVERRIDE_RVVM2NOFMA 10)
+set(COSTOVERRIDE_RVVM2 20)
+set(COSTOVERRIDE_RVVM2NOFMA 20
+)
 
 #
 
diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h
index 10c734e2..8450e51a 100644
--- a/src/arch/helperrvv.h
+++ b/src/arch/helperrvv.h
@@ -12,27 +12,27 @@
 
 #if CONFIG == 1 || CONFIG == 2
 #define ISANAME "RISC-V Vector Extension with Min. VLEN"
-#define SLEEF_RVV_VLEN __riscv_v_min_vlen
+#define SLEEF_RVV_VLEN __riscv_vlenb()
 #elif CONFIG == 7
 // 128-bit vector length
 #define ISANAME "RISC-V Vector Extension 128-bit"
-#define SLEEF_RVV_VLEN (1 << 7)
+#define SLEEF_RVV_VLEN ((1 << 7) / 8)
 #elif CONFIG == 8
 // 256-bit vector length
 #define ISANAME "RISC-V Vector Extension 256-bit"
-#define SLEEF_RVV_VLEN (1 << 8)
+#define SLEEF_RVV_VLEN ((1 << 8) / 8)
 #elif CONFIG == 9
 // 512-bit vector length
 #define ISANAME "RISC-V Vector Extension 512-bit"
-#define SLEEF_RVV_VLEN (1 << 9)
+#define SLEEF_RVV_VLEN ((1 << 9) / 8)
 #elif CONFIG == 10
 // 1024-bit vector length
 #define ISANAME "RISC-V Vector Extension 1024-bit"
-#define SLEEF_RVV_VLEN (1 << 10)
+#define SLEEF_RVV_VLEN ((1 << 10) / 8)
 #elif CONFIG == 11
 // 2048-bit vector length
 #define ISANAME "RISC-V Vector Extension 2048-bit"
-#define SLEEF_RVV_VLEN (1 << 11)
+#define SLEEF_RVV_VLEN ((1 << 11) / 8)
 #else
 #error CONFIG macro invalid or not defined
 #endif
@@ -114,8 +114,8 @@ typedef vint32m2_t fi_t;
 typedef vint32m4_t dfi_t;
 #define SLEEF_RVV_SP_LMUL 1
 #define SLEEF_RVV_DP_LMUL 1
-#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32)
-#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64)
+#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / sizeof(float))
+#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / sizeof(double))
 #define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m1
 #define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m1
 #define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m1
@@ -203,8 +203,8 @@ typedef vint32m4_t fi_t;
 typedef vint32m8_t dfi_t;
 #define SLEEF_RVV_SP_LMUL 2
 #define SLEEF_RVV_DP_LMUL 2
-#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32)
-#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64)
+#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / sizeof(float))
+#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / sizeof(double))
 #define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m2
 #define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m2
 #define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m2
diff --git a/src/libm-tester/tester3.c b/src/libm-tester/tester3.c
index 3027dff3..a55404ed 100644
--- a/src/libm-tester/tester3.c
+++ b/src/libm-tester/tester3.c
@@ -104,8 +104,8 @@ static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float
 #if __riscv && __riscv_v
 
 #if defined(ENABLE_RVVM1)
-#define VECTLENSP (1 * __riscv_v_min_vlen / 32)
-#define VECTLENDP (1 * __riscv_v_min_vlen / 64)
+#define VECTLENSP (1 * __riscv_vlenb() / sizeof(float))
+#define VECTLENDP (1 * __riscv_vlenb() / sizeof(double))
 
 static INLINE vfloat32m1_t setvfloat32m1_t(float d, int r)  { float  a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m1(a, VECTLENSP); }
 static INLINE float getvfloat32m1_t(vfloat32m1_t v, int r)  { float  a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); }
@@ -118,8 +118,8 @@ static vfloat64m1_t vd2getx_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v
 static vfloat64m1_t vd2gety_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 1); }
 
 #elif defined(ENABLE_RVVM2)
-#define VECTLENSP (2 * __riscv_v_min_vlen / 32)
-#define VECTLENDP (2 * __riscv_v_min_vlen / 64)
+#define VECTLENSP (2 * __riscv_vlenb() / sizeof(float))
+#define VECTLENDP (2 * __riscv_vlenb() / sizeof(double))
 
 static INLINE vfloat32m2_t setvfloat32m2_t(float d, int r)  { float  a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m2(a, VECTLENSP); }
 static INLINE float getvfloat32m2_t(vfloat32m2_t v, int r)  { float  a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); }

From 12a174beaf5ca9b20168ebbb968f9d9c842fd505 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Sat, 18 Nov 2023 23:44:07 +0000
Subject: [PATCH 23/24] fixup! Use __riscv_vlenb

---
 Configure.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Configure.cmake b/Configure.cmake
index ac64f16f..58fe5403 100644
--- a/Configure.cmake
+++ b/Configure.cmake
@@ -634,7 +634,7 @@ if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM1)
   CHECK_C_SOURCE_COMPILES("
   #include <riscv_vector.h>
   int main() {
-    vint32m1_t r = __riscv_vmv_v_x_i32m1(1, __riscv_v_min_vlen / 32); }"
+    vint32m1_t r = __riscv_vmv_v_x_i32m1(1, __riscv_vlenb() * 8 / 32); }"
     COMPILER_SUPPORTS_RVVM1)
 
   if(COMPILER_SUPPORTS_RVVM1)
@@ -656,7 +656,7 @@ if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM2)
   CHECK_C_SOURCE_COMPILES("
   #include <riscv_vector.h>
   int main() {
-    vint32m2_t r = __riscv_vmv_v_x_i32m2(1, __riscv_v_min_vlen / 32); }"
+    vint32m2_t r = __riscv_vmv_v_x_i32m2(1, 2 * __riscv_vlenb() * 8 / 32); }"
     COMPILER_SUPPORTS_RVVM2)
 
   if(COMPILER_SUPPORTS_RVVM2)

From 19ee07e6b371a9dbc84a223c6e05d26bdf0aeac8 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Mon, 20 Nov 2023 10:34:19 +0000
Subject: [PATCH 24/24] Fix indent

---
 CMakeLists.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c89fa6c6..c38219f1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -90,8 +90,7 @@ set(COSTOVERRIDE_SVENOFMA 10)
 set(COSTOVERRIDE_RVVM1 10)
 set(COSTOVERRIDE_RVVM1NOFMA 10)
 set(COSTOVERRIDE_RVVM2 20)
-set(COSTOVERRIDE_RVVM2NOFMA 20
-)
+set(COSTOVERRIDE_RVVM2NOFMA 20)
 
 #