From d747f611af755558d5d073d39185224d4b180fea Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Wed, 19 Oct 2022 18:27:04 +0200 Subject: [PATCH 01/24] Extend FMA support to RISC-V --- src/arch/helperpurec_scalar.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/helperpurec_scalar.h b/src/arch/helperpurec_scalar.h index d8b9c845..fb83b84c 100644 --- a/src/arch/helperpurec_scalar.h +++ b/src/arch/helperpurec_scalar.h @@ -54,7 +54,7 @@ #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP -#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || CONFIG == 3 +#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || defined(__riscv) || CONFIG == 3 #ifndef FP_FAST_FMA //@#ifndef FP_FAST_FMA #define FP_FAST_FMA From a7f1d93d4d598c707563f87250c3677d217270bf Mon Sep 17 00:00:00 2001 From: Eric Love Date: Fri, 13 Jan 2023 13:01:50 -0800 Subject: [PATCH 02/24] Add support for the RISC-V Vector ISA --- CMakeLists.txt | 2 + Configure.cmake | 7 + src/arch/helperpurec_scalar.h | 2 +- src/arch/helperrvv.h | 1016 ++++++++++++++++++++++++++++ src/common/commonfuncs.h | 6 +- src/common/dd.h | 2 +- src/common/df.h | 2 +- src/libm-tester/iutsimd.c | 16 +- src/libm-tester/tester2simddp.c | 18 +- src/libm-tester/tester2simdsp.c | 18 +- src/libm/CMakeLists.txt | 13 + src/libm/sleeflibm_header.h.org.in | 12 + src/libm/sleefsimddp.c | 27 + src/libm/sleefsimdsp.c | 35 +- travis/toolchain-riscv64.cmake | 9 + 15 files changed, 1175 insertions(+), 10 deletions(-) create mode 100644 src/arch/helperrvv.h create mode 100644 travis/toolchain-riscv64.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index ec9e04e3..40dca676 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,6 +46,7 @@ set(SLEEF_ALL_SUPPORTED_EXTENSIONS NEON32 NEON32VFPV4 # Aarch32 VSX VSXNOFMA VSX3 VSX3NOFMA # PPC64 VXE VXENOFMA VXE2 VXE2NOFMA # IBM Z + RVVM1 RVVM2 # RISC-V Vectors PUREC_SCALAR PURECFMA_SCALAR # Generic type CACHE STRING "List of SIMD architectures supported by libsleef." ) @@ -56,6 +57,7 @@ set(SLEEF_SUPPORTED_LIBM_EXTENSIONS NEON32 NEON32VFPV4 # Aarch32 VSX VSXNOFMA VSX3 VSX3NOFMA # PPC64 VXE VXENOFMA VXE2 VXE2NOFMA # IBM Z + RVVM1 RVVM2 # RISC-V Vectors PUREC_SCALAR PURECFMA_SCALAR # Generic type CACHE STRING "List of SIMD architectures supported by libsleef." ) diff --git a/Configure.cmake b/Configure.cmake index 63d2c638..25392d09 100644 --- a/Configure.cmake +++ b/Configure.cmake @@ -119,6 +119,10 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") set(SLEEF_ARCH_S390X ON CACHE INTERNAL "True for IBM Z architecture.") set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z14;-mzvector") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") + set(SLEEF_ARCH_RISCV64 ON CACHE INTERNAL "True for RISCV64 architecture.") + set(COMPILER_SUPPORTS_RVVM1 1) + set(COMPILER_SUPPORTS_RVVM2 1) endif() set(COMPILER_SUPPORTS_PUREC_SCALAR 1) @@ -163,6 +167,9 @@ set(CLANG_FLAGS_ENABLE_VXE "-march=z14;-mzvector") set(CLANG_FLAGS_ENABLE_VXENOFMA "-march=z14;-mzvector") set(CLANG_FLAGS_ENABLE_VXE2 "-march=z15;-mzvector") set(CLANG_FLAGS_ENABLE_VXE2NOFMA "-march=z15;-mzvector") +# RISC-V +set(CLANG_FLAGS_ENABLE_RVVM1 "-march=rv64gcv") +set(CLANG_FLAGS_ENABLE_RVVM2 "-march=rv64gcv") set(FLAGS_OTHERS "") diff --git a/src/arch/helperpurec_scalar.h b/src/arch/helperpurec_scalar.h index d8b9c845..fb83b84c 100644 --- a/src/arch/helperpurec_scalar.h +++ b/src/arch/helperpurec_scalar.h @@ -54,7 +54,7 @@ #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP -#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || CONFIG == 3 +#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || defined(__riscv) || CONFIG == 3 #ifndef FP_FAST_FMA //@#ifndef FP_FAST_FMA #define FP_FAST_FMA diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h new file mode 100644 index 00000000..fafac723 --- /dev/null +++ b/src/arch/helperrvv.h @@ -0,0 +1,1016 @@ +#ifndef HELPERRVV_H +#define HELPERRVV_H + +#if !defined(SLEEF_GENHEADER) +#include +#include "misc.h" + +#if defined(VECTLENDP) || defined(VECTLENSP) +#error VECTLENDP or VECTLENSP already defined +#endif +#endif // #if !defined(SLEEF_GENHEADER) + +#if CONFIG == 1 +#define ISANAME "RISC-V Vector Extension with Min. VLEN" +#define SLEEF_RVV_VLEN __riscv_v_min_vlen +#else +#define ISANAME "RISC-V Vector Extension VLEN=2^"#CONFIG +#define SLEEF_RVV_VLEN (1 << CONFIG) +#endif + +#ifndef CONFIG +#error CONFIG macro not defined +#endif + +#define ENABLE_SP +#define ENABLE_FMA_DP +#define ENABLE_DP + +static INLINE int vavailability_i(int name) { return -1; } + +//////////////////////////////////////////////////////////////////////////////// +// RISC-V Vector Types +//////////////////////////////////////////////////////////////////////////////// + +// About the RISC-V Vector type translations: +// +// Because the single- and double-precision versions of the RVV port have +// conflicting definitions of the vmask and vopmask types, they can only +// be defined for at most one precision level in a single translation unit. +// Any functions that use vmask or vopmask types are thus enabled only by the +// corresponding ENABLE_RVV_SP or ENABLE_RVV_DP macro guards. +#if defined(ENABLE_RVV_SP) && defined(ENABLE_RVV_DP) +#error Cannot simultaneously define ENABLE_RVV_SP and ENABLE_RVV_DP +#endif + +#ifdef ENABLE_RVV_SP +// Types that conflict with ENABLE_RVV_DP +#ifdef ENABLE_RVVM1 +typedef vuint64m2_t vmask; +typedef vbool32_t vopmask; +#else +typedef vuint64m4_t vmask; +typedef vbool16_t vopmask; +#endif +#endif + +#ifdef ENABLE_RVV_DP +// Types that conflict with ENABLE_RVV_SP +#ifdef ENABLE_RVVM1 +typedef vuint64m1_t vmask; +typedef vbool64_t vopmask; +#else +typedef vuint64m2_t vmask; +typedef vbool32_t vopmask; +#endif +#endif + +// LMUL-Dependent Type & Macro Definitions: +// +// Some SLEEF types are multi-value structs. RVV vectors have unknown length at +// compile time, so they cannote appear in a struct in Clang. They are instead +// represented as single vectors with "members" packed into the registers of a +// wide-LMUL register group. In the largest cases (ddi_t and ddf_t), this +// requires LMUL=8 if the base type (vfloat or vdouble) has LMUL=2, meaning +// LMUL=2 is currently the widest option for SLEEF function argument types. +#ifdef ENABLE_RVVM1 + +typedef vint32mf2_t vint; +typedef vfloat64m1_t vdouble; +typedef vfloat64m2_t vdouble2; +typedef vfloat64m4_t vdouble3; +typedef vfloat64m4_t dd2; +typedef vuint64m2_t vquad; +typedef vint32m2_t di_t; +typedef vint32m4_t ddi_t; +typedef vfloat32m1_t vfloat; +typedef vfloat32m2_t vfloat2; +typedef vfloat32m4_t df2; +typedef vint32m1_t vint2; +typedef vint32m2_t fi_t; +typedef vint32m4_t dfi_t; +#define SLEEF_RVV_SP_LMUL 1 +#define SLEEF_RVV_DP_LMUL 1 +#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32) +#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64) +#define SLEEF_RVV_SP_VCAST_VF_F vfmv_v_f_f32m1 +#define SLEEF_RVV_SP_VCAST_VI2_I vmv_v_x_i32m1 +#define SLEEF_RVV_SP_VCAST_VU2_U vmv_v_x_u32m1 +#define SLEEF_RVV_SP_VREINTERPRET_VF vreinterpret_f32m1 +#define SLEEF_RVV_SP_VREINTERPRET_VF2 vreinterpret_f32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VI2 vreinterpret_i32m1 +#define SLEEF_RVV_SP_VREINTERPRET_2VI vreinterpret_i32m2 +#define SLEEF_RVV_SP_VREINTERPRET_4VI vreinterpret_i32m4 +#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m1 +#define SLEEF_RVV_SP_VREINTERPRET_VU2 vreinterpret_u32m1 +#define SLEEF_RVV_SP_VGET_VI2 vget_i32m1 +#define SLEEF_RVV_SP_VGET_2VI vget_i32m2 +#define SLEEF_RVV_SP_VGET_VF vget_f32m1 +#define SLEEF_RVV_SP_VGET_VF2 vget_f32m2 +#define SLEEF_RVV_SP_VGET_4VF vget_f32m4 +#define SLEEF_RVV_SP_VGET_VU2 vget_u32m2 +#define SLEEF_RVV_SP_LOAD_VF vle32_v_f32m1 +#define SLEEF_RVV_SP_LOAD_VI2 vle32_v_i32m1 +#define SLEEF_RVV_SP_VCAST_VM_U vmv_v_x_u64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VI64 vreinterpret_i64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m1 +#define SLEEF_RVV_SP_LOAD_VI vle32_v_i32m1 +#define SLEEF_RVV_DP_VCAST_VD_D vfmv_v_f_f64m1 +#define SLEEF_RVV_DP_VCAST_VD_VI(x) vfwcvt_f(x, VECTLENDP) +#define SLEEF_RVV_DP_VCAST_VI_I vmv_v_x_i32mf2 +#define SLEEF_RVV_DP_VCAST_VM_U vmv_v_x_u64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VD vreinterpret_f64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VD2 vreinterpret_f64m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \ + vreinterpret_v_i64m2_i32m2(vreinterpret_i64m2(x)) +#define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \ + vreinterpret_f64m2(vreinterpret_v_i32m2_i64m2(x)) +#define SLEEF_RVV_DP_VREINTERPRET_4VD vreinterpret_f64m4 +#define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \ + vreinterpret_f64m4(vreinterpret_v_i32m4_i64m4(x)) +#define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \ + vreinterpret_v_i64m4_i32m4(vreinterpret_i64m4(x)) +#define SLEEF_RVV_DP_VREINTERPRET_VM vreinterpret_u64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VI64 vreinterpret_i64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VU64 vreinterpret_u64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VI vreinterpret_i32mf2 +#define SLEEF_RVV_DP_VREINTERPRET_VI2 vreinterpret_i32m1 +#define SLEEF_RVV_DP_VREINTERPRET_2VI vreinterpret_i32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VI vreinterpret_i32m4 +#define SLEEF_RVV_DP_VREINTERPRET_8VI vreinterpret_i32m8 +#define SLEEF_RVV_DP_VREINTERPRET_VU vreinterpret_u32mf2 +#define SLEEF_RVV_DP_VREINTERPRET_2VU vreinterpret_u32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VU vreinterpret_u32m4 +#define SLEEF_RVV_DP_VGET_VM vget_u64m1 +#define SLEEF_RVV_DP_VGET_VD vget_f64m1 +#define SLEEF_RVV_DP_VGET_VD2 vget_f64m2 +#define SLEEF_RVV_DP_VGET_4VD vget_f64m2 +#define SLEEF_RVV_DP_VGET_VI vget_i32m1 +#define SLEEF_RVV_DP_VGET_VI2 vget_i32m1 +#define SLEEF_RVV_DP_VGET_2VI vget_i32m1 +#define SLEEF_RVV_DP_VGET_4VI vget_i32m2 +#define SLEEF_RVV_DP_VGET_8VI vget_i32m4 +#define SLEEF_RVV_DP_VGET_VU vget_u32m1 +#define SLEEF_RVV_DP_LOAD_VD vle64_v_f64m1 +#define SLEEF_RVV_DP_LOAD_VI vle32_v_i32mf2 + +#else + +typedef vint32m1_t vint; +typedef vfloat64m2_t vdouble; +typedef vfloat64m4_t vdouble2; +typedef vfloat64m8_t vdouble3; +typedef vfloat64m8_t dd2; +typedef vuint64m4_t vquad; +typedef vint32m4_t di_t; +typedef vint32m8_t ddi_t; +typedef vfloat32m2_t vfloat; +typedef vfloat32m4_t vfloat2; +typedef vfloat32m8_t df2; +typedef vint32m2_t vint2; +typedef vint32m4_t fi_t; +typedef vint32m8_t dfi_t; +#define SLEEF_RVV_SP_LMUL 2 +#define SLEEF_RVV_DP_LMUL 2 +#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32) +#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64) +#define SLEEF_RVV_SP_VCAST_VF_F vfmv_v_f_f32m2 +#define SLEEF_RVV_SP_VCAST_VI2_I vmv_v_x_i32m2 +#define SLEEF_RVV_SP_VCAST_VU2_U vmv_v_x_u32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VF vreinterpret_f32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VF2 vreinterpret_f32m4 +#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VI2 vreinterpret_i32m2 +#define SLEEF_RVV_SP_VREINTERPRET_2VI vreinterpret_i32m4 +#define SLEEF_RVV_SP_VREINTERPRET_4VI vreinterpret_i32m8 +#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VU2 vreinterpret_u32m2 +#define SLEEF_RVV_SP_VGET_VI2 vget_i32m2 +#define SLEEF_RVV_SP_VGET_2VI vget_i32m4 +#define SLEEF_RVV_SP_VGET_VF vget_f32m2 +#define SLEEF_RVV_SP_VGET_VF2 vget_f32m4 +#define SLEEF_RVV_SP_VGET_4VF vget_f32m8 +#define SLEEF_RVV_SP_VGET_VU2 vget_u32m4 +#define SLEEF_RVV_SP_LOAD_VF vle32_v_f32m2 +#define SLEEF_RVV_SP_LOAD_VI2 vle32_v_i32m2 +#define SLEEF_RVV_SP_VCAST_VM_U vmv_v_x_u64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VI64 vreinterpret_i64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m2 +#define SLEEF_RVV_SP_LOAD_VI vle32_v_i32m2 +#define SLEEF_RVV_DP_VCAST_VD_D vfmv_v_f_f64m2 +#define SLEEF_RVV_DP_VCAST_VD_VI(x) vfwcvt_f(x, VECTLENDP) +#define SLEEF_RVV_DP_VCAST_VI_I vmv_v_x_i32m1 +#define SLEEF_RVV_DP_VCAST_VM_U vmv_v_x_u64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VD vreinterpret_f64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VD2 vreinterpret_f64m4 +#define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \ + vreinterpret_v_i64m4_i32m4(vreinterpret_i64m4(x)) +#define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \ + vreinterpret_f64m4(vreinterpret_v_i32m4_i64m4(x)) +#define SLEEF_RVV_DP_VREINTERPRET_4VD vreinterpret_f64m8 +#define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \ + vreinterpret_f64m8(vreinterpret_v_i32m8_i64m8(x)) +#define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \ + vreinterpret_v_i64m8_i32m8(vreinterpret_i64m8(x)) +#define SLEEF_RVV_DP_VREINTERPRET_VM vreinterpret_u64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VI64 vreinterpret_i64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VU64 vreinterpret_u64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VI vreinterpret_i32m1 +#define SLEEF_RVV_DP_VREINTERPRET_VI2 vreinterpret_i32m1 +#define SLEEF_RVV_DP_VREINTERPRET_2VI vreinterpret_i32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VI vreinterpret_i32m4 +#define SLEEF_RVV_DP_VREINTERPRET_8VI vreinterpret_i32m8 +#define SLEEF_RVV_DP_VREINTERPRET_VU vreinterpret_u32m1 +#define SLEEF_RVV_DP_VREINTERPRET_2VU vreinterpret_u32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VU vreinterpret_u32m4 +#define SLEEF_RVV_DP_VGET_VM vget_u64m2 +#define SLEEF_RVV_DP_VGET_VD vget_f64m2 +#define SLEEF_RVV_DP_VGET_VD2 vget_f64m4 +#define SLEEF_RVV_DP_VGET_4VD vget_f64m4 +#define SLEEF_RVV_DP_VGET_VI vget_i32m1 +#define SLEEF_RVV_DP_VGET_VI2 vget_i32m1 +#define SLEEF_RVV_DP_VGET_2VI vget_i32m2 +#define SLEEF_RVV_DP_VGET_4VI vget_i32m4 +#define SLEEF_RVV_DP_VGET_8VI vget_i32m8 +#define SLEEF_RVV_DP_VGET_VU vget_u32m1 +#define SLEEF_RVV_DP_LOAD_VD vle64_v_f64m2 +#define SLEEF_RVV_DP_LOAD_VI vle32_v_i32m1 + +#endif // ENABLE_RVVM1 + +//////////////////////////////////////////////////////////////////////////////// +// Single-Precision Functions +//////////////////////////////////////////////////////////////////////////////// + +/****************************************/ +/* Multi-value and multi-word types */ +/****************************************/ +// fi type +static INLINE vfloat figetd_vf_di(fi_t d) { + return SLEEF_RVV_SP_VREINTERPRET_VF(SLEEF_RVV_SP_VGET_VI2(d, 0)); +} +static INLINE vint2 figeti_vi2_di(fi_t d) { + return SLEEF_RVV_SP_VGET_VI2(d, 1); +} +static INLINE fi_t fisetdi_fi_vf_vi2(vfloat d, vint2 i) { + fi_t res; + res = vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_VI2(d)); + res = vset(res, 1, i); + return res; +} +static INLINE vfloat2 dfigetdf_vf2_dfi(dfi_t d) { + return SLEEF_RVV_SP_VREINTERPRET_VF2(SLEEF_RVV_SP_VGET_2VI(d, 0)); +} +static INLINE vint2 dfigeti_vi2_dfi(dfi_t d) { + return SLEEF_RVV_SP_VGET_VI2(d, 2); +} +static INLINE dfi_t dfisetdfi_dfi_vf2_vi2(vfloat2 v, vint2 i) { + dfi_t res; + res = vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v)); + res = vset(res, 2, i); + return res; +} +static INLINE dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) { + return vset(dfi, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v)); +} +// vfloat2 type +static INLINE vfloat vf2getx_vf_vf2(vfloat2 v) { + return SLEEF_RVV_SP_VGET_VF(v, 0); +} +static INLINE vfloat vf2gety_vf_vf2(vfloat2 v) { + return SLEEF_RVV_SP_VGET_VF(v, 1); +} +static INLINE vfloat2 vf2setxy_vf2_vf_vf(vfloat x, vfloat y) { + vfloat2 res; + res = vset(res, 0, x); + res = vset(res, 1, y); + return res; +} +static INLINE vfloat2 vf2setx_vf2_vf2_vf(vfloat2 v, vfloat d) { + return vset(v, 0, d); +} +static INLINE vfloat2 vf2sety_vf2_vf2_vf(vfloat2 v, vfloat d) { + return vset(v, 1, d); +} +// df2 type +static df2 df2setab_df2_vf2_vf2(vfloat2 a, vfloat2 b) { + df2 res; + res = vset(res, 0, a); + res = vset(res, 1, b); + return res; +} +static vfloat2 df2geta_vf2_df2(df2 d) { return SLEEF_RVV_SP_VGET_VF2(d, 0); } +static vfloat2 df2getb_vf2_df2(df2 d) { return SLEEF_RVV_SP_VGET_VF2(d, 1); } +static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { + return SLEEF_RVV_SP_VREINTERPRET_VI2(vf); +} +static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { + return SLEEF_RVV_SP_VREINTERPRET_VF(vi); +} + + +/****************************************/ +/* Type Conversions and Broadcasts */ +/****************************************/ +static INLINE vfloat vcast_vf_f(float f) { + return SLEEF_RVV_SP_VCAST_VF_F(f, VECTLENSP); +} +static INLINE vfloat vrint_vf_vf(vfloat vd) { + // It is not currently possible to safely set frm for intrinsics, + // so emulate round-to-nearest behavior + vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP); + half = vfsgnj(half, vd, VECTLENSP); + vfloat res = vfadd(vd, half, VECTLENSP); + vint2 i = vfcvt_rtz_x(res, VECTLENSP); + return vfcvt_f(i, VECTLENSP); +} +static INLINE vfloat vcast_vf_vi2(vint2 vi) { + return vfcvt_f(vi, VECTLENSP); +} +static INLINE vint2 vcast_vi2_i(int i) { + return SLEEF_RVV_SP_VCAST_VI2_I(i, VECTLENSP); +} +static INLINE vint2 vrint_vi2_vf(vfloat vf) { + // It is not currently possible to safely set frm for intrinsics, + // so emulate round-to-nearest behavior + vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP); + half = vfsgnj(half, vf, VECTLENSP); + vfloat res = vfadd(vf, half, VECTLENSP); + return vfcvt_rtz_x(res, VECTLENSP); +} +static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { + return vfcvt_rtz_x(vf, VECTLENSP); +} +static INLINE vfloat vtruncate_vf_vf(vfloat vf) { + return vcast_vf_vi2(vtruncate_vi2_vf(vf)); +} + + +/****************************************/ +/* Memory Operations */ +/****************************************/ +static INLINE vfloat vload_vf_p(const float *ptr) { + return SLEEF_RVV_SP_LOAD_VF(ptr, VECTLENSP); +} +static INLINE vfloat vloadu_vf_p(const float *ptr) { + return SLEEF_RVV_SP_LOAD_VF(ptr, VECTLENSP); +} +static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { + vse32(ptr, v, VECTLENSP); +} +static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { + vse32(ptr, v, VECTLENSP); +} +static INLINE void vstoreu_v_p_vi2(int32_t *ptr, vint2 v) { + vse32(ptr, v, VECTLENSP); +} +static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { + return vluxei32(ptr, vmul(SLEEF_RVV_SP_VREINTERPRET_VU(vi2), sizeof(float), VECTLENSP), VECTLENSP); +} + + +/****************************************/ +/* Floating-Point Arithmetic */ +/****************************************/ +static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { + return vfadd(x, y, VECTLENSP); +} +static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { + return vfsub(x, y, VECTLENSP); +} +static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { + return vfmul(x, y, VECTLENSP); +} +static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { + return vfdiv(x, y, VECTLENSP); +} +static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { + return vfmax(x, y, VECTLENSP); +} +static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { + return vfmin(x, y, VECTLENSP); +} +static INLINE vfloat vrec_vf_vf(vfloat d) { + return vfdiv(vcast_vf_f(1.0f), d, VECTLENSP); +} +static INLINE vfloat vsqrt_vf_vf(vfloat d) { + return vfsqrt(d, VECTLENSP); +} +// fused multiply-add/subtract +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return vfmadd(x, y, z, VECTLENSP); +} +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return vfnmsub(x, y, z, VECTLENSP); +} +// sign manipulation +static INLINE vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) { + return vfsgnjx(x, y, VECTLENSP); +} +static INLINE vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) { + return vfsgnj(x, y, VECTLENSP); +} +static INLINE vfloat vsign_vf_vf(vfloat f) { + return vfsgnj(SLEEF_RVV_SP_VCAST_VF_F(1.0f, VECTLENSP), f, VECTLENSP); +} +static INLINE vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) { + vint2 xi = SLEEF_RVV_SP_VREINTERPRET_VI2(x); + vint2 yi = SLEEF_RVV_SP_VREINTERPRET_VI2(y); + vint2 xioryi = vor(xi, yi, VECTLENSP); + vfloat xory = SLEEF_RVV_SP_VREINTERPRET_VF(xioryi); + return vfsgnj(x, xory, VECTLENSP); +} +static INLINE vfloat vabs_vf_vf(vfloat f) { + return vfabs(f, VECTLENSP); +} +static INLINE vfloat vneg_vf_vf(vfloat f) { + return vfneg(f, VECTLENSP); +} + + +/****************************************/ +/* Integer Arithmetic and Logic */ +/****************************************/ +static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { + return vadd(x, y, VECTLENSP); +} +static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { + return vsub(x, y, VECTLENSP); +} +static INLINE vint2 vneg_vi2_vi2(vint2 x) { + return vneg(x, VECTLENSP); +} +static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { + return vand(x, y, VECTLENSP); +} +static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { + return vand(vnot(x, VECTLENSP), y, VECTLENSP); +} +static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { + return vor(x, y, VECTLENSP); +} +static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { + return vxor(x, y, VECTLENSP); +} +static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { + return vsll(x, c, VECTLENSP); +} +static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { + return vsra(x, c, VECTLENSP); +} +static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { + return SLEEF_RVV_SP_VREINTERPRET_VI2(vsrl(SLEEF_RVV_SP_VREINTERPRET_VU2(x), c, VECTLENSP)); +} + +#ifdef ENABLE_RVV_SP +/****************************************/ +/* Bitmask Operations */ +/****************************************/ +static INLINE vfloat vreinterpret_vf_vm(vmask vm) { + return SLEEF_RVV_SP_VREINTERPRET_VF(vncvt_x(vm, VECTLENSP)); +} +static INLINE vmask vreinterpret_vm_vf(vfloat vf) { + return vwcvtu_x(SLEEF_RVV_SP_VREINTERPRET_VU(vf), VECTLENSP); +} +static INLINE int vtestallones_i_vo32(vopmask g) { + return vcpop(g, VECTLENSP) == VECTLENSP; +} +static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) { + return SLEEF_RVV_SP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENSP); +} +static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { + return vand(x, y, VECTLENSP); +} +static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { + return vor(x, y, VECTLENSP); +} +static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { + return vxor(x, y, VECTLENSP); +} +static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { + return vand(SLEEF_RVV_SP_VREINTERPRET_VM(vnot(SLEEF_RVV_SP_VREINTERPRET_VI64(x), VECTLENSP)), y, VECTLENSP); +} +static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { + return vmerge(x, y, -1, VECTLENSP); +} +static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { + return vmerge(vmnot(x, VECTLENSP), y, 0, VECTLENSP); +} +static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { + return vmerge(x, y, 0, VECTLENSP); +} + + +/****************************************/ +/* Logical Mask Operations */ +/****************************************/ +static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { + return vmand(x, y, VECTLENSP); +} +static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { + return vmandn(y, x, VECTLENSP); +} +static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { + return vmor(x, y, VECTLENSP); +} +static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { + return vmxor(x, y, VECTLENSP); +} +// single precision FP comparison +static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { + return vmfeq(x, y, VECTLENSP); +} +static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { + return vmfne(x, y, VECTLENSP); +} +static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { + return vmfgt(x, y, VECTLENSP); +} +static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { + return vmfge(x, y, VECTLENSP); +} +static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { + return vmflt(x, y, VECTLENSP); +} +static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { + return vmfle(x, y, VECTLENSP); +} +static INLINE vopmask visnan_vo_vf(vfloat d) { + return vmfne(d, d, VECTLENSP); +} +static INLINE vopmask visinf_vo_vf(vfloat d) { + return vmfeq(vfabs(d, VECTLENSP), SLEEF_INFINITYf, VECTLENSP); +} +static INLINE vopmask vispinf_vo_vf(vfloat d) { + return vmfeq(d, SLEEF_INFINITYf, VECTLENSP); +} +// conditional select +static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { + return vmerge(mask, y, x, VECTLENSP); +} +static INLINE vfloat vsel_vf_vo_f_f(vopmask mask, float v1, float v0) { + return vfmerge(mask, vcast_vf_f(v0), v1, VECTLENSP); +} +static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { + return vfmerge(o0, vfmerge(o1, vcast_vf_f(d2), d1, VECTLENSP), d0, VECTLENSP); +} +static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { + return vfmerge(o0, vfmerge(o1, vfmerge(o2, vcast_vf_f(d3), d2, VECTLENSP), d1, VECTLENSP), d0, VECTLENSP); +} +// integer comparison +static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { + return vmseq(x, y, VECTLENSP); +} +static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { + return vmsgt(x, y, VECTLENSP); +} +static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { + vint2 zero = vcast_vi2_i(0); + return vmerge(vmsgt(x, y, VECTLENSP), zero, -1, VECTLENSP); +} +// integer conditional select +static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { + return vmerge(m, y, x, VECTLENSP); +} +static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { + return vmerge(vmnot(x, VECTLENSP), y, 0, VECTLENSP); +} +#endif // ENABLE_RVV_SP + + +//////////////////////////////////////////////////////////////////////////////// +// Double-Precision Functions +//////////////////////////////////////////////////////////////////////////////// + +/****************************************/ +/* Multi-value and multi-word types */ +/****************************************/ +// vdouble2 type +static INLINE const vdouble vd2getx_vd_vd2(vdouble2 v) { + return SLEEF_RVV_DP_VGET_VD(v, 0); +} +static INLINE const vdouble vd2gety_vd_vd2(vdouble2 v) { + return SLEEF_RVV_DP_VGET_VD(v, 1); +} +static INLINE const vdouble2 vd2setxy_vd2_vd_vd(vdouble x, vdouble y) { + vdouble2 res; + res = vset(res, 0, x); + res = vset(res, 1, y); + return res; +} +static INLINE const vdouble2 vd2setx_vd2_vd2_vd(vdouble2 v, vdouble d) { + return vset(v, 0, d); +} +static INLINE const vdouble2 vd2sety_vd2_vd2_vd(vdouble2 v, vdouble d) { + return vset(v, 1, d); +} +// dd2 type +static dd2 dd2setab_dd2_vd2_vd2(vdouble2 a, vdouble2 b) { + dd2 res; + res = vset(res, 0, a); + res = vset(res, 1, b); + return res; +} +static vdouble2 dd2geta_vd2_dd2(dd2 d) { return SLEEF_RVV_DP_VGET_4VD(d, 0); } +static vdouble2 dd2getb_vd2_dd2(dd2 d) { return SLEEF_RVV_DP_VGET_4VD(d, 1); } +// vdouble3 type +static INLINE vdouble vd3getx_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 0); } +static INLINE vdouble vd3gety_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 1); } +static INLINE vdouble vd3getz_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 2); } +static INLINE vdouble3 vd3setxyz_vd3_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + vdouble3 res; + res = vset(res, 0, x); + res = vset(res, 1, y); + res = vset(res, 2, z); + return res; +} +static INLINE vdouble3 vd3setx_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 0, d); } +static INLINE vdouble3 vd3sety_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 1, d); } +static INLINE vdouble3 vd3setz_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 2, d); } +// di type +static INLINE vdouble digetd_vd_di(di_t d) { + return SLEEF_RVV_DP_VGET_VD(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(d), 0); +} +static INLINE vint digeti_vi_di(di_t d) { +#ifdef ENABLE_RVVM1 + return vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 1)); +#else + return SLEEF_RVV_DP_VGET_VI(d, 2); +#endif +} +static INLINE di_t disetdi_di_vd_vi(vdouble d, vint i) { + di_t res; + res = SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(vset(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(res), 0, d)); +#ifdef ENABLE_RVVM1 + res = vset(res, 1, vlmul_ext_i32m1(i)); +#else + res = vset(res, 2, i); +#endif + return res; +} +// ddi type +static INLINE vdouble2 ddigetdd_vd2_ddi(ddi_t d) { + return SLEEF_RVV_DP_VGET_VD2(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(d), 0); +} +static INLINE vint ddigeti_vi_ddi(ddi_t d) { +#ifdef ENABLE_RVVM1 + return vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 2)); +#else + return SLEEF_RVV_DP_VGET_VI(d, 4); +#endif +} +static INLINE ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) { + ddi_t res; + res = SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(res), 0, v)); +#ifdef ENABLE_RVVM1 + res = vset(res, 2, vlmul_ext_i32m1(i)); +#else + res = vset(res, 4, i); +#endif + return res; +} +static INLINE ddi_t ddisetdd_ddi_ddi_vd2(ddi_t ddi, vdouble2 v) { + return SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(ddi), 0, v)); +} + +/****************************************/ +/* Type Conversions and Broadcasts */ +/****************************************/ +static INLINE vdouble vcast_vd_d(double d) { + return SLEEF_RVV_DP_VCAST_VD_D(d, VECTLENDP); +} +static INLINE vdouble vcast_vd_vi(vint i) { + return SLEEF_RVV_DP_VCAST_VD_VI(i); +} +static INLINE vint vcast_vi_i(int32_t i) { + return SLEEF_RVV_DP_VCAST_VI_I(i, VECTLENDP); +} +static INLINE vint vrint_vi_vd(vdouble vd) { + // It is not currently possible to safely set frm for intrinsics, + // so emulate round-to-nearest behavior + vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP); + half = vfsgnj(half, vd, VECTLENDP); + vdouble res = vfadd(vd, half, VECTLENDP); + return vfncvt_rtz_x(res, VECTLENDP); +} +static INLINE vdouble vrint_vd_vd(vdouble vd) { + // It is not currently possible to safely set frm for intrinsics, + // so emulate round-to-nearest behavior + vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP); + half = vfsgnj(half, vd, VECTLENDP); + vdouble res = vfadd(vd, half, VECTLENDP); + return vfwcvt_f(vfncvt_rtz_x(res, VECTLENDP), VECTLENDP); +} +static INLINE vint vtruncate_vi_vd(vdouble vd) { + return vfncvt_rtz_x(vd, VECTLENDP); +} +static INLINE vdouble vtruncate_vd_vd(vdouble vd) { + return vcast_vd_vi(vtruncate_vi_vd(vd)); +} + + +/****************************************/ +/* Memory Operations */ +/****************************************/ +static INLINE vdouble vload_vd_p(const double *ptr) { + return SLEEF_RVV_DP_LOAD_VD(ptr, VECTLENDP); +} +static INLINE vdouble vloadu_vd_p(const double *ptr) { + return SLEEF_RVV_DP_LOAD_VD(ptr, VECTLENDP); +} +static INLINE vint vloadu_vi_p(int32_t *p) { + return SLEEF_RVV_DP_LOAD_VI(p, VECTLENDP); +} +static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { + vse64(ptr, v, VECTLENDP); +} +static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { + vse64(ptr, v, VECTLENDP); +} +static INLINE void vstoreu_v_p_vi(int32_t *ptr, vint v) { + vse32(ptr, v, VECTLENDP); +} +static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { + return vluxei64(ptr, vwmulu(SLEEF_RVV_DP_VREINTERPRET_VU(vi), sizeof(double), VECTLENDP), VECTLENDP); +} + + +/****************************************/ +/* Floating-Point Arithmetic */ +/****************************************/ +static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { + return vfadd(x, y, VECTLENDP); +} +static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { + return vfsub(x, y, VECTLENDP); +} +static INLINE vdouble vrec_vd_vd(vdouble d) { + return vfdiv(vcast_vd_d(1.0), d, VECTLENDP); +} +static INLINE vdouble vabs_vd_vd(vdouble d) { + return vfabs(d, VECTLENDP); +} +static INLINE vdouble vsqrt_vd_vd(vdouble d) { + return vfsqrt(d, VECTLENDP); +} +static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { + return vfmul(x, y, VECTLENDP); +} +static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { + return vfdiv(x, y, VECTLENDP); +} +static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { + return vfmax(x, y, VECTLENDP); +} +static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { + return vfmin(x, y, VECTLENDP); +} +// fused multiply add / sub +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return vfmadd(x, y, z, VECTLENDP); +} +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return vfmsub(x, y, z, VECTLENDP); +} +static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return vfmadd(x, y, z, VECTLENDP); +} +static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return vfnmsub(x, y, z, VECTLENDP); +} +static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return vfmsub(x, y, z, VECTLENDP); +} +// sign manipulation +static INLINE vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) { + return vfsgnjx(x, y, VECTLENDP); +} +static INLINE vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) { + return vfsgnj(x, y, VECTLENDP); +} +static INLINE vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) { + return vfsgnj(x, SLEEF_RVV_DP_VREINTERPRET_VD(vor(SLEEF_RVV_DP_VREINTERPRET_VM(x), SLEEF_RVV_DP_VREINTERPRET_VM(y), VECTLENDP)), VECTLENDP); +} +static INLINE vdouble vneg_vd_vd(vdouble d) { + return vfneg(d, VECTLENDP); +} + + +/****************************************/ +/* Integer Arithmetic and Logic */ +/****************************************/ +static INLINE vint vadd_vi_vi_vi(vint x, vint y) { + return vadd(x, y, VECTLENDP); +} +static INLINE vint vsub_vi_vi_vi(vint x, vint y) { + return vsub(x, y, VECTLENDP); +} +static INLINE vint vneg_vi_vi(vint x) { + return vneg(x, VECTLENDP); +} +static INLINE vint vand_vi_vi_vi(vint x, vint y) { + return vand(x, y, VECTLENDP); +} +static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { + return vand(vnot(x, VECTLENDP), y, VECTLENDP); +} +static INLINE vint vor_vi_vi_vi(vint x, vint y) { + return vor(x, y, VECTLENDP); +} +static INLINE vint vxor_vi_vi_vi(vint x, vint y) { + return vxor(x, y, VECTLENDP); +} +static INLINE vint vsll_vi_vi_i(vint x, int c) { + return vsll(x, c, VECTLENDP); +} +static INLINE vint vsra_vi_vi_i(vint x, int c) { + return vsra(x, c, VECTLENDP); +} +static INLINE vint vsrl_vi_vi_i(vint x, int c) { + return SLEEF_RVV_DP_VREINTERPRET_VI(vsrl(SLEEF_RVV_DP_VREINTERPRET_VU(x), c, VECTLENDP)); +} + + +#ifdef ENABLE_RVV_DP +/****************************************/ +/* Bitmask Operations */ +/****************************************/ +static INLINE vmask vcast_vm_i64(int64_t c) { + return SLEEF_RVV_DP_VCAST_VM_U(c, VECTLENDP); +} +static INLINE vmask vcast_vm_u64(uint64_t c) { + return SLEEF_RVV_DP_VCAST_VM_U(c, VECTLENDP); +} +static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) { + return SLEEF_RVV_DP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENDP); +} +static INLINE vmask vcast_vm_vi(vint vi) { + return SLEEF_RVV_DP_VREINTERPRET_VM(vwcvt_x(vi, VECTLENDP)); +} +static INLINE vmask vcastu_vm_vi(vint vi) { + return vsll(SLEEF_RVV_DP_VREINTERPRET_VM(vwcvt_x(vi, VECTLENDP)), 32, VECTLENDP); +} +static INLINE vint vcastu_vi_vm(vmask vm) { + return SLEEF_RVV_DP_VREINTERPRET_VI(vnsrl(vm, 32, VECTLENDP)); +} +static INLINE vint vcast_vi_vm(vmask vm) { + return SLEEF_RVV_DP_VREINTERPRET_VI(vncvt_x(vm, VECTLENDP)); +} +static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { + return vmerge(vmnot(x, VECTLENDP), y, 0, VECTLENDP); +} +static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { + return vand(x, y, VECTLENDP); +} +static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { + return vor(x, y, VECTLENDP); +} +static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { + return vxor(x, y, VECTLENDP); +} +static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { + return vand(SLEEF_RVV_DP_VREINTERPRET_VM(vnot(SLEEF_RVV_DP_VREINTERPRET_VI64(x), VECTLENDP)), y, VECTLENDP); +} +static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { + return vmerge(x, y, 0, VECTLENDP); +} +static INLINE vmask vsll64_vm_vm_i(vmask mask, int64_t c) { + return vsll(mask, c, VECTLENDP); +} +static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { + return SLEEF_RVV_DP_VREINTERPRET_VM(vsub(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP)); +} +static INLINE vmask vsrl64_vm_vm_i(vmask mask, int64_t c) { + return vsrl(mask, c, VECTLENDP); +} +static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { + return vadd(x, y, VECTLENDP); +} +static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { + return vmerge(x, y, -1, VECTLENDP); +} +static INLINE vmask vsel_vm_vo64_vm_vm(vopmask mask, vmask x, vmask y) { + return vmerge(mask, y, x, VECTLENDP); +} +static INLINE vmask vneg64_vm_vm(vmask mask) { + return SLEEF_RVV_DP_VREINTERPRET_VM(vneg(SLEEF_RVV_DP_VREINTERPRET_VI64(mask), VECTLENDP)); +} +static INLINE vdouble vreinterpret_vd_vm(vmask vm) { + return SLEEF_RVV_DP_VREINTERPRET_VD(vm); +} +static INLINE vmask vreinterpret_vm_vd(vdouble vd) { + return SLEEF_RVV_DP_VREINTERPRET_VM(vd); +} + +// vquad type +static INLINE const vmask vqgetx_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v, 0); } +static INLINE const vmask vqgety_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v, 1); } +static INLINE vquad vqsetxy_vq_vm_vm(vmask x, vmask y) { + vquad res; + res = vset(res, 0, x); + res = vset(res, 1, y); + return res; +} +static INLINE vquad vqsetx_vq_vq_vm(vquad v, vmask x) { return vset(v, 0, x); } +static INLINE vquad vqsety_vq_vq_vm(vquad v, vmask y) { return vset(v, 1, y); } + + + +/****************************************/ +/* Logical Mask Operations */ +/****************************************/ +static INLINE vopmask vcast_vo64_vo32(vopmask vo) { + return vo; +} +static INLINE vopmask vcast_vo32_vo64(vopmask vo) { + return vo; +} +static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { + return vmand(x, y, VECTLENDP); +} +static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { + return vmandn(y, x, VECTLENDP); +} +static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { + return vmor(x, y, VECTLENDP); +} +static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { + return vmxor(x, y, VECTLENDP); +} +static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { + return vmseq(x, y, VECTLENDP); +} +static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { + return vmsgt(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP); +} +// double-precision comparison +static INLINE vopmask visinf_vo_vd(vdouble d) { + return vmfeq(vfabs(d, VECTLENDP), SLEEF_INFINITY, VECTLENDP); +} +static INLINE vopmask vispinf_vo_vd(vdouble d) { + return vmfeq(d, SLEEF_INFINITY, VECTLENDP); +} +static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { + return vmfeq(x, y, VECTLENDP); +} +static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { + return vmfne(x, y, VECTLENDP); +} +static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { + return vmflt(x, y, VECTLENDP); +} +static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { + return vmfle(x, y, VECTLENDP); +} +static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { + return vmfgt(x, y, VECTLENDP); +} +static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { + return vmfge(x, y, VECTLENDP); +} +static INLINE vopmask visnan_vo_vd(vdouble d) { + return vmfne(d, d, VECTLENDP); +} +// double-precision conditional select +static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) { + return vmerge(mask, y, x, VECTLENDP); +} +static INLINE vdouble vsel_vd_vo_d_d(vopmask mask, double v0, double v1) { + return vfmerge(mask, vcast_vd_d(v1), v0, VECTLENDP); +} +static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { + return vfmerge(o0, vfmerge(o1, vcast_vd_d(d2), d1, VECTLENDP), d0, VECTLENDP); +} +static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { + return vfmerge(o0, vfmerge(o1, vfmerge(o2, vcast_vd_d(d3), d2, VECTLENDP), d1, VECTLENDP), d0, VECTLENDP); +} +static INLINE int vtestallones_i_vo64(vopmask g) { + return vcpop(g, VECTLENDP) == VECTLENDP; +} +// integer comparison +static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { + return vmseq(x, y, VECTLENDP); +} +static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { + return vmsgt(x, y, VECTLENDP); +} +static INLINE vint vgt_vi_vi_vi(vint x, vint y) { + vint zero = vcast_vi_i(0); + return vmerge(vmsgt(x, y, VECTLENDP), zero, -1, VECTLENDP); +} +// integer conditional select +static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { + return vmerge(m, y, x, VECTLENDP); +} +static INLINE vint vandnot_vi_vo_vi(vopmask mask, vint vi) { + return vmerge(mask, vi, 0, VECTLENDP); +} +static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { + return vmerge(vmnot(x, VECTLENDP), y, 0, VECTLENDP); +} +#endif // ENABLE_RVV_DP + +#endif // HELPERRVV_H diff --git a/src/common/commonfuncs.h b/src/common/commonfuncs.h index 2f1a0da9..aff782df 100644 --- a/src/common/commonfuncs.h +++ b/src/common/commonfuncs.h @@ -3,7 +3,7 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) typedef struct { vdouble x, y, z; } vdouble3; @@ -210,14 +210,17 @@ static INLINE CONST VECTOR_CC vdouble vtoward0_vd_vd(vdouble x) { // returns nex return vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vcast_vd_d(0), t); } +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) static INLINE CONST vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y))); } +#endif static INLINE CONST VECTOR_CC vdouble vsign_vd_vd(vdouble d) { return vmulsign_vd_vd_vd(vcast_vd_d(1.0), d); } +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) static INLINE CONST VECTOR_CC vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y))); } @@ -226,6 +229,7 @@ static INLINE CONST VECTOR_CC vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vandnot_vm_vm_vm(vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(x)), vand_vm_vm_vm (vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(y)))); } +#endif static INLINE CONST VECTOR_CC vdouble vtruncate2_vd_vd(vdouble x) { #ifdef FULL_FP_ROUNDING diff --git a/src/common/dd.h b/src/common/dd.h index b1423556..89af2e87 100644 --- a/src/common/dd.h +++ b/src/common/dd.h @@ -3,7 +3,7 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) #if !defined(ENABLE_CUDA) typedef struct { vdouble x, y; diff --git a/src/common/df.h b/src/common/df.h index 4e3e7949..0883b227 100644 --- a/src/common/df.h +++ b/src/common/df.h @@ -3,7 +3,7 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) #if !defined(ENABLE_CUDA) typedef struct { vfloat x, y; diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c index 002cb0f1..90353586 100644 --- a/src/libm-tester/iutsimd.c +++ b/src/libm-tester/iutsimd.c @@ -343,6 +343,18 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif +#ifdef ENABLE_RVVM1 +#define CONFIG 1 +#include "helperrvv.h" +#include "renamervvm1.h" +#endif + +#ifdef ENABLE_RVVM2 +#define CONFIG 1 +#include "helperrvv.h" +#include "renamervvm2.h" +#endif + #ifdef ENABLE_PUREC_SCALAR #include "renamepurec_scalar.h" #if !defined(USE_INLINE_HEADER) @@ -426,12 +438,12 @@ int check_feature(double d, float f) { return 0; } -#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(USE_INLINE_HEADER)) +#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2) || defined(USE_INLINE_HEADER)) static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; } static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; } #endif -#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(USE_INLINE_HEADER)) +#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2) || defined(USE_INLINE_HEADER)) static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; } static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; } #endif diff --git a/src/libm-tester/tester2simddp.c b/src/libm-tester/tester2simddp.c index 540d1142..9d723868 100644 --- a/src/libm-tester/tester2simddp.c +++ b/src/libm-tester/tester2simddp.c @@ -191,6 +191,22 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif +#ifdef ENABLE_RVVM1 +#define CONFIG 1 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm1.h" +#include "sleef.h" +#endif + +#ifdef ENABLE_RVVM2 +#define CONFIG 1 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm2.h" +#include "sleef.h" +#endif + #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" @@ -209,7 +225,7 @@ typedef Sleef_float_2 vfloat2; // -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; } static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; } #endif diff --git a/src/libm-tester/tester2simdsp.c b/src/libm-tester/tester2simdsp.c index d140ba4b..d83e8b4b 100644 --- a/src/libm-tester/tester2simdsp.c +++ b/src/libm-tester/tester2simdsp.c @@ -191,6 +191,22 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif +#ifdef ENABLE_RVVM1 +#define CONFIG 1 +#define ENABLE_RVV_SP +#include "helperrvv.h" +#include "renamervvm1.h" +#include "sleef.h" +#endif + +#ifdef ENABLE_RVVM2 +#define CONFIG 1 +#define ENABLE_RVV_SP +#include "helperrvv.h" +#include "renamervvm2.h" +#include "sleef.h" +#endif + #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" @@ -209,7 +225,7 @@ typedef Sleef_float_2 vfloat2; // -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; } static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; } #endif diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt index fe0a5d39..352383b1 100644 --- a/src/libm/CMakeLists.txt +++ b/src/libm/CMakeLists.txt @@ -60,6 +60,13 @@ elseif(SLEEF_ARCH_S390X) PURECFMA_SCALAR DSP_SCALAR ) +elseif(SLEEF_ARCH_RISCV64) + set(SLEEF_HEADER_LIST + RVVM1 + RVVM2 + PUREC_SCALAR + PURECFMA_SCALAR + ) endif() # HEADER_PARAMS @@ -98,6 +105,9 @@ command_arguments(HEADER_PARAMS_VXENOFMA cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" command_arguments(HEADER_PARAMS_VXE2 finz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2) command_arguments(HEADER_PARAMS_VXE2NOFMA cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2nofma) +command_arguments(HEADER_PARAMS_RVVM1 finz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v m1) +command_arguments(HEADER_PARAMS_RVVM2 finz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v m2) + command_arguments(HEADER_PARAMS_DSP_SCALAR - 1 1 double float int32_t int32_t __STDC__) command_arguments(HEADER_PARAMS_PUREC_SCALAR cinz_ 1 1 double float int32_t int32_t __STDC__ purec) command_arguments(HEADER_PARAMS_PURECFMA_SCALAR finz_ 1 1 double float int32_t int32_t __STDC__ purecfma) @@ -144,6 +154,9 @@ command_arguments(RENAME_PARAMS_GNUABI_ADVSIMD advsimd n 2 4 float64x2_t float3 # the "x" token of VLA SVE vector functions. command_arguments(RENAME_PARAMS_GNUABI_SVE sve s x x svfloat64_t svfloat32_t svint32_t svint32_t __ARM_SVE) +command_arguments(RENAME_PARAMS_RVVM1 finz_ x x m1) +command_arguments(RENAME_PARAMS_RVVM2 finz_ x x m2) + # ALIAS_PARAMS command_arguments(ALIAS_PARAMS_AVX512F_DP 8 __m512d __m256i e avx512f) diff --git a/src/libm/sleeflibm_header.h.org.in b/src/libm/sleeflibm_header.h.org.in index d637b60e..89b3a1ca 100644 --- a/src/libm/sleeflibm_header.h.org.in +++ b/src/libm/sleeflibm_header.h.org.in @@ -131,6 +131,18 @@ SLEEF_IMPORT void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); // +#if defined(__riscv_v) +#include +typedef vfloat64m2_t Sleef_vfloat64m1_t_2; +typedef vfloat32m2_t Sleef_vfloat32m1_t_2; +typedef vfloat64m4_t Sleef_vfloat64m2_t_2; +typedef vfloat32m4_t Sleef_vfloat32m2_t_2; +#define Sleef_vfloat64m1_t_2_DEFINED +#define Sleef_vfloat32m1_t_2_DEFINED +#define Sleef_vfloat64m2_t_2_DEFINED +#define Sleef_vfloat32m2_t_2_DEFINED +#endif + #ifndef Sleef_double2_DEFINED #define Sleef_double2_DEFINED typedef struct { diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c index e531495f..68c13a34 100644 --- a/src/libm/sleefsimddp.c +++ b/src/libm/sleefsimddp.c @@ -221,6 +221,33 @@ extern const double Sleef_rempitabdp[]; #endif #endif +// RISC-V +#ifdef ENABLE_RVVM1 +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_DP +#include "helperrvv.h" +#else +#include "macroonlyRVVM1.h" +#endif +#ifdef DORENAME +#include "renamervvm1.h" +#endif +#endif + +#ifdef ENABLE_RVVM2 +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_DP +#include "helperrvv.h" +#else +#include "macroonlyRVVM2.h" +#endif +#ifdef DORENAME +#include "renamervvm2.h" +#endif +#endif + // Generic #ifdef ENABLE_VECEXT diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c index 9e1faa23..5ec5a082 100644 --- a/src/libm/sleefsimdsp.c +++ b/src/libm/sleefsimdsp.c @@ -321,6 +321,33 @@ extern const float Sleef_rempitabsp[]; #endif #endif +// RISC-V +#ifdef ENABLE_RVVM1 +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_SP +#include "helperrvv.h" +#else +#include "macroonlyRVVM1.h" +#endif +#ifdef DORENAME +#include "renamervvm1.h" +#endif +#endif + +#ifdef ENABLE_RVVM2 +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_SP +#include "helperrvv.h" +#else +#include "macroonlyRVVM2.h" +#endif +#ifdef DORENAME +#include "renamervvm2.h" +#endif +#endif + // Generic #ifdef ENABLE_VECEXT @@ -401,6 +428,7 @@ static INLINE CONST VECTOR_CC vmask vsignbit_vm_vf(vfloat f) { return vand_vm_vm_vm(vreinterpret_vm_vf(f), vreinterpret_vm_vf(vcast_vf_f(-0.0f))); } +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) static INLINE CONST VECTOR_CC vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y))); } @@ -413,6 +441,7 @@ static INLINE CONST VECTOR_CC vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) { static INLINE CONST VECTOR_CC vfloat vsign_vf_vf(vfloat f) { return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(1.0f)), vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f)))); } +#endif static INLINE CONST VECTOR_CC vopmask vsignbit_vo_vf(vfloat d) { return veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vcast_vi2_i(0x80000000)), vcast_vi2_i(0x80000000)); @@ -487,7 +516,7 @@ static INLINE CONST VECTOR_CC vfloat vldexp3_vf_vf_vi2(vfloat d, vint2 q) { EXPORT CONST VECTOR_CC vfloat xldexpf(vfloat x, vint2 q) { return vldexp_vf_vf_vi2(x, q); } -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) typedef struct { vfloat d; vint2 i; @@ -517,9 +546,11 @@ static dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) { } #endif +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) static INLINE CONST VECTOR_CC vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) { return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y))); } +#endif static INLINE CONST fi_t rempisubf(vfloat x) { #ifdef FULL_FP_ROUNDING @@ -3290,7 +3321,7 @@ EXPORT CONST VECTOR_CC vfloat xcospif_u05(vfloat d) { } #endif // #if !defined(DETERMINISTIC) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) typedef struct { vfloat2 a, b; } df2; diff --git a/travis/toolchain-riscv64.cmake b/travis/toolchain-riscv64.cmake new file mode 100644 index 00000000..bb7b4977 --- /dev/null +++ b/travis/toolchain-riscv64.cmake @@ -0,0 +1,9 @@ +set(CMAKE_CROSSCOMPILING TRUE) +set(CMAKE_SYSTEM_NAME "Linux") +set(CMAKE_SYSTEM_PROCESSOR "riscv64") + +find_program(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-clang) + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) From 46230a3a1ddf25e55171bc531243fc2c4f018ddc Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Thu, 2 Nov 2023 14:45:27 +0000 Subject: [PATCH 03/24] RIVOS: update rvv support to latest intrinsics - intrinsic functions are now prefixed with __riscv_ - vmerge/vfmerge argument order has changed --- src/arch/helperrvv.h | 612 +++++++++++++++++++++---------------------- 1 file changed, 306 insertions(+), 306 deletions(-) diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h index fafac723..18fe7642 100644 --- a/src/arch/helperrvv.h +++ b/src/arch/helperrvv.h @@ -93,68 +93,68 @@ typedef vint32m4_t dfi_t; #define SLEEF_RVV_DP_LMUL 1 #define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32) #define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64) -#define SLEEF_RVV_SP_VCAST_VF_F vfmv_v_f_f32m1 -#define SLEEF_RVV_SP_VCAST_VI2_I vmv_v_x_i32m1 -#define SLEEF_RVV_SP_VCAST_VU2_U vmv_v_x_u32m1 -#define SLEEF_RVV_SP_VREINTERPRET_VF vreinterpret_f32m1 -#define SLEEF_RVV_SP_VREINTERPRET_VF2 vreinterpret_f32m2 -#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m2 -#define SLEEF_RVV_SP_VREINTERPRET_VI2 vreinterpret_i32m1 -#define SLEEF_RVV_SP_VREINTERPRET_2VI vreinterpret_i32m2 -#define SLEEF_RVV_SP_VREINTERPRET_4VI vreinterpret_i32m4 -#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m1 -#define SLEEF_RVV_SP_VREINTERPRET_VU2 vreinterpret_u32m1 -#define SLEEF_RVV_SP_VGET_VI2 vget_i32m1 -#define SLEEF_RVV_SP_VGET_2VI vget_i32m2 -#define SLEEF_RVV_SP_VGET_VF vget_f32m1 -#define SLEEF_RVV_SP_VGET_VF2 vget_f32m2 -#define SLEEF_RVV_SP_VGET_4VF vget_f32m4 -#define SLEEF_RVV_SP_VGET_VU2 vget_u32m2 -#define SLEEF_RVV_SP_LOAD_VF vle32_v_f32m1 -#define SLEEF_RVV_SP_LOAD_VI2 vle32_v_i32m1 -#define SLEEF_RVV_SP_VCAST_VM_U vmv_v_x_u64m2 -#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m2 -#define SLEEF_RVV_SP_VREINTERPRET_VI64 vreinterpret_i64m2 -#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m1 -#define SLEEF_RVV_SP_LOAD_VI vle32_v_i32m1 -#define SLEEF_RVV_DP_VCAST_VD_D vfmv_v_f_f64m1 -#define SLEEF_RVV_DP_VCAST_VD_VI(x) vfwcvt_f(x, VECTLENDP) -#define SLEEF_RVV_DP_VCAST_VI_I vmv_v_x_i32mf2 -#define SLEEF_RVV_DP_VCAST_VM_U vmv_v_x_u64m1 -#define SLEEF_RVV_DP_VREINTERPRET_VD vreinterpret_f64m1 -#define SLEEF_RVV_DP_VREINTERPRET_VD2 vreinterpret_f64m2 +#define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m1 +#define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m1 +#define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m1 +#define SLEEF_RVV_SP_VREINTERPRET_VF __riscv_vreinterpret_f32m1 +#define SLEEF_RVV_SP_VREINTERPRET_VF2 __riscv_vreinterpret_f32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1 +#define SLEEF_RVV_SP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2 +#define SLEEF_RVV_SP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4 +#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m1 +#define SLEEF_RVV_SP_VREINTERPRET_VU2 __riscv_vreinterpret_u32m1 +#define SLEEF_RVV_SP_VGET_VI2 __riscv_vget_i32m1 +#define SLEEF_RVV_SP_VGET_2VI __riscv_vget_i32m2 +#define SLEEF_RVV_SP_VGET_VF __riscv_vget_f32m1 +#define SLEEF_RVV_SP_VGET_VF2 __riscv_vget_f32m2 +#define SLEEF_RVV_SP_VGET_4VF __riscv_vget_f32m4 +#define SLEEF_RVV_SP_VGET_VU2 __riscv_vget_u32m2 +#define SLEEF_RVV_SP_LOAD_VF __riscv_vle32_v_f32m1 +#define SLEEF_RVV_SP_LOAD_VI2 __riscv_vle32_v_i32m1 +#define SLEEF_RVV_SP_VCAST_VM_U __riscv_vmv_v_x_u64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m1 +#define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m1 +#define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m1 +#define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP) +#define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32mf2 +#define SLEEF_RVV_DP_VCAST_VM_U __riscv_vmv_v_x_u64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VD __riscv_vreinterpret_f64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VD2 __riscv_vreinterpret_f64m2 #define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \ - vreinterpret_v_i64m2_i32m2(vreinterpret_i64m2(x)) + __riscv_vreinterpret_v_i64m2_i32m2(__riscv_vreinterpret_i64m2(x)) #define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \ - vreinterpret_f64m2(vreinterpret_v_i32m2_i64m2(x)) -#define SLEEF_RVV_DP_VREINTERPRET_4VD vreinterpret_f64m4 + __riscv_vreinterpret_f64m2(__riscv_vreinterpret_v_i32m2_i64m2(x)) +#define SLEEF_RVV_DP_VREINTERPRET_4VD __riscv_vreinterpret_f64m4 #define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \ - vreinterpret_f64m4(vreinterpret_v_i32m4_i64m4(x)) + __riscv_vreinterpret_f64m4(__riscv_vreinterpret_v_i32m4_i64m4(x)) #define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \ - vreinterpret_v_i64m4_i32m4(vreinterpret_i64m4(x)) -#define SLEEF_RVV_DP_VREINTERPRET_VM vreinterpret_u64m1 -#define SLEEF_RVV_DP_VREINTERPRET_VI64 vreinterpret_i64m1 -#define SLEEF_RVV_DP_VREINTERPRET_VU64 vreinterpret_u64m1 -#define SLEEF_RVV_DP_VREINTERPRET_VI vreinterpret_i32mf2 -#define SLEEF_RVV_DP_VREINTERPRET_VI2 vreinterpret_i32m1 -#define SLEEF_RVV_DP_VREINTERPRET_2VI vreinterpret_i32m2 -#define SLEEF_RVV_DP_VREINTERPRET_4VI vreinterpret_i32m4 -#define SLEEF_RVV_DP_VREINTERPRET_8VI vreinterpret_i32m8 -#define SLEEF_RVV_DP_VREINTERPRET_VU vreinterpret_u32mf2 -#define SLEEF_RVV_DP_VREINTERPRET_2VU vreinterpret_u32m2 -#define SLEEF_RVV_DP_VREINTERPRET_4VU vreinterpret_u32m4 -#define SLEEF_RVV_DP_VGET_VM vget_u64m1 -#define SLEEF_RVV_DP_VGET_VD vget_f64m1 -#define SLEEF_RVV_DP_VGET_VD2 vget_f64m2 -#define SLEEF_RVV_DP_VGET_4VD vget_f64m2 -#define SLEEF_RVV_DP_VGET_VI vget_i32m1 -#define SLEEF_RVV_DP_VGET_VI2 vget_i32m1 -#define SLEEF_RVV_DP_VGET_2VI vget_i32m1 -#define SLEEF_RVV_DP_VGET_4VI vget_i32m2 -#define SLEEF_RVV_DP_VGET_8VI vget_i32m4 -#define SLEEF_RVV_DP_VGET_VU vget_u32m1 -#define SLEEF_RVV_DP_LOAD_VD vle64_v_f64m1 -#define SLEEF_RVV_DP_LOAD_VI vle32_v_i32mf2 + __riscv_vreinterpret_v_i64m4_i32m4(__riscv_vreinterpret_i64m4(x)) +#define SLEEF_RVV_DP_VREINTERPRET_VM __riscv_vreinterpret_u64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VU64 __riscv_vreinterpret_u64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VI __riscv_vreinterpret_i32mf2 +#define SLEEF_RVV_DP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1 +#define SLEEF_RVV_DP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4 +#define SLEEF_RVV_DP_VREINTERPRET_8VI __riscv_vreinterpret_i32m8 +#define SLEEF_RVV_DP_VREINTERPRET_VU __riscv_vreinterpret_u32mf2 +#define SLEEF_RVV_DP_VREINTERPRET_2VU __riscv_vreinterpret_u32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VU __riscv_vreinterpret_u32m4 +#define SLEEF_RVV_DP_VGET_VM __riscv_vget_u64m1 +#define SLEEF_RVV_DP_VGET_VD __riscv_vget_f64m1 +#define SLEEF_RVV_DP_VGET_VD2 __riscv_vget_f64m2 +#define SLEEF_RVV_DP_VGET_4VD __riscv_vget_f64m2 +#define SLEEF_RVV_DP_VGET_VI __riscv_vget_i32m1 +#define SLEEF_RVV_DP_VGET_VI2 __riscv_vget_i32m1 +#define SLEEF_RVV_DP_VGET_2VI __riscv_vget_i32m1 +#define SLEEF_RVV_DP_VGET_4VI __riscv_vget_i32m2 +#define SLEEF_RVV_DP_VGET_8VI __riscv_vget_i32m4 +#define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1 +#define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m1 +#define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32mf2 #else @@ -176,68 +176,68 @@ typedef vint32m8_t dfi_t; #define SLEEF_RVV_DP_LMUL 2 #define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32) #define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64) -#define SLEEF_RVV_SP_VCAST_VF_F vfmv_v_f_f32m2 -#define SLEEF_RVV_SP_VCAST_VI2_I vmv_v_x_i32m2 -#define SLEEF_RVV_SP_VCAST_VU2_U vmv_v_x_u32m2 -#define SLEEF_RVV_SP_VREINTERPRET_VF vreinterpret_f32m2 -#define SLEEF_RVV_SP_VREINTERPRET_VF2 vreinterpret_f32m4 -#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m4 -#define SLEEF_RVV_SP_VREINTERPRET_VI2 vreinterpret_i32m2 -#define SLEEF_RVV_SP_VREINTERPRET_2VI vreinterpret_i32m4 -#define SLEEF_RVV_SP_VREINTERPRET_4VI vreinterpret_i32m8 -#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m2 -#define SLEEF_RVV_SP_VREINTERPRET_VU2 vreinterpret_u32m2 -#define SLEEF_RVV_SP_VGET_VI2 vget_i32m2 -#define SLEEF_RVV_SP_VGET_2VI vget_i32m4 -#define SLEEF_RVV_SP_VGET_VF vget_f32m2 -#define SLEEF_RVV_SP_VGET_VF2 vget_f32m4 -#define SLEEF_RVV_SP_VGET_4VF vget_f32m8 -#define SLEEF_RVV_SP_VGET_VU2 vget_u32m4 -#define SLEEF_RVV_SP_LOAD_VF vle32_v_f32m2 -#define SLEEF_RVV_SP_LOAD_VI2 vle32_v_i32m2 -#define SLEEF_RVV_SP_VCAST_VM_U vmv_v_x_u64m4 -#define SLEEF_RVV_SP_VREINTERPRET_VM vreinterpret_u64m4 -#define SLEEF_RVV_SP_VREINTERPRET_VI64 vreinterpret_i64m4 -#define SLEEF_RVV_SP_VREINTERPRET_VU vreinterpret_u32m2 -#define SLEEF_RVV_SP_LOAD_VI vle32_v_i32m2 -#define SLEEF_RVV_DP_VCAST_VD_D vfmv_v_f_f64m2 -#define SLEEF_RVV_DP_VCAST_VD_VI(x) vfwcvt_f(x, VECTLENDP) -#define SLEEF_RVV_DP_VCAST_VI_I vmv_v_x_i32m1 -#define SLEEF_RVV_DP_VCAST_VM_U vmv_v_x_u64m2 -#define SLEEF_RVV_DP_VREINTERPRET_VD vreinterpret_f64m2 -#define SLEEF_RVV_DP_VREINTERPRET_VD2 vreinterpret_f64m4 +#define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m2 +#define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m2 +#define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VF __riscv_vreinterpret_f32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VF2 __riscv_vreinterpret_f32m4 +#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m2 +#define SLEEF_RVV_SP_VREINTERPRET_2VI __riscv_vreinterpret_i32m4 +#define SLEEF_RVV_SP_VREINTERPRET_4VI __riscv_vreinterpret_i32m8 +#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VU2 __riscv_vreinterpret_u32m2 +#define SLEEF_RVV_SP_VGET_VI2 __riscv_vget_i32m2 +#define SLEEF_RVV_SP_VGET_2VI __riscv_vget_i32m4 +#define SLEEF_RVV_SP_VGET_VF __riscv_vget_f32m2 +#define SLEEF_RVV_SP_VGET_VF2 __riscv_vget_f32m4 +#define SLEEF_RVV_SP_VGET_4VF __riscv_vget_f32m8 +#define SLEEF_RVV_SP_VGET_VU2 __riscv_vget_u32m4 +#define SLEEF_RVV_SP_LOAD_VF __riscv_vle32_v_f32m2 +#define SLEEF_RVV_SP_LOAD_VI2 __riscv_vle32_v_i32m2 +#define SLEEF_RVV_SP_VCAST_VM_U __riscv_vmv_v_x_u64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m2 +#define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m2 +#define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m2 +#define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP) +#define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32m1 +#define SLEEF_RVV_DP_VCAST_VM_U __riscv_vmv_v_x_u64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VD __riscv_vreinterpret_f64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VD2 __riscv_vreinterpret_f64m4 #define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \ - vreinterpret_v_i64m4_i32m4(vreinterpret_i64m4(x)) + __riscv_vreinterpret_v_i64m4_i32m4(__riscv_vreinterpret_i64m4(x)) #define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \ - vreinterpret_f64m4(vreinterpret_v_i32m4_i64m4(x)) -#define SLEEF_RVV_DP_VREINTERPRET_4VD vreinterpret_f64m8 + __riscv_vreinterpret_f64m4(__riscv_vreinterpret_v_i32m4_i64m4(x)) +#define SLEEF_RVV_DP_VREINTERPRET_4VD __riscv_vreinterpret_f64m8 #define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \ - vreinterpret_f64m8(vreinterpret_v_i32m8_i64m8(x)) + __riscv_vreinterpret_f64m8(__riscv_vreinterpret_v_i32m8_i64m8(x)) #define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \ - vreinterpret_v_i64m8_i32m8(vreinterpret_i64m8(x)) -#define SLEEF_RVV_DP_VREINTERPRET_VM vreinterpret_u64m2 -#define SLEEF_RVV_DP_VREINTERPRET_VI64 vreinterpret_i64m2 -#define SLEEF_RVV_DP_VREINTERPRET_VU64 vreinterpret_u64m2 -#define SLEEF_RVV_DP_VREINTERPRET_VI vreinterpret_i32m1 -#define SLEEF_RVV_DP_VREINTERPRET_VI2 vreinterpret_i32m1 -#define SLEEF_RVV_DP_VREINTERPRET_2VI vreinterpret_i32m2 -#define SLEEF_RVV_DP_VREINTERPRET_4VI vreinterpret_i32m4 -#define SLEEF_RVV_DP_VREINTERPRET_8VI vreinterpret_i32m8 -#define SLEEF_RVV_DP_VREINTERPRET_VU vreinterpret_u32m1 -#define SLEEF_RVV_DP_VREINTERPRET_2VU vreinterpret_u32m2 -#define SLEEF_RVV_DP_VREINTERPRET_4VU vreinterpret_u32m4 -#define SLEEF_RVV_DP_VGET_VM vget_u64m2 -#define SLEEF_RVV_DP_VGET_VD vget_f64m2 -#define SLEEF_RVV_DP_VGET_VD2 vget_f64m4 -#define SLEEF_RVV_DP_VGET_4VD vget_f64m4 -#define SLEEF_RVV_DP_VGET_VI vget_i32m1 -#define SLEEF_RVV_DP_VGET_VI2 vget_i32m1 -#define SLEEF_RVV_DP_VGET_2VI vget_i32m2 -#define SLEEF_RVV_DP_VGET_4VI vget_i32m4 -#define SLEEF_RVV_DP_VGET_8VI vget_i32m8 -#define SLEEF_RVV_DP_VGET_VU vget_u32m1 -#define SLEEF_RVV_DP_LOAD_VD vle64_v_f64m2 -#define SLEEF_RVV_DP_LOAD_VI vle32_v_i32m1 + __riscv_vreinterpret_v_i64m8_i32m8(__riscv_vreinterpret_i64m8(x)) +#define SLEEF_RVV_DP_VREINTERPRET_VM __riscv_vreinterpret_u64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VU64 __riscv_vreinterpret_u64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VI __riscv_vreinterpret_i32m1 +#define SLEEF_RVV_DP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1 +#define SLEEF_RVV_DP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4 +#define SLEEF_RVV_DP_VREINTERPRET_8VI __riscv_vreinterpret_i32m8 +#define SLEEF_RVV_DP_VREINTERPRET_VU __riscv_vreinterpret_u32m1 +#define SLEEF_RVV_DP_VREINTERPRET_2VU __riscv_vreinterpret_u32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VU __riscv_vreinterpret_u32m4 +#define SLEEF_RVV_DP_VGET_VM __riscv_vget_u64m2 +#define SLEEF_RVV_DP_VGET_VD __riscv_vget_f64m2 +#define SLEEF_RVV_DP_VGET_VD2 __riscv_vget_f64m4 +#define SLEEF_RVV_DP_VGET_4VD __riscv_vget_f64m4 +#define SLEEF_RVV_DP_VGET_VI __riscv_vget_i32m1 +#define SLEEF_RVV_DP_VGET_VI2 __riscv_vget_i32m1 +#define SLEEF_RVV_DP_VGET_2VI __riscv_vget_i32m2 +#define SLEEF_RVV_DP_VGET_4VI __riscv_vget_i32m4 +#define SLEEF_RVV_DP_VGET_8VI __riscv_vget_i32m8 +#define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1 +#define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m2 +#define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32m1 #endif // ENABLE_RVVM1 @@ -257,8 +257,8 @@ static INLINE vint2 figeti_vi2_di(fi_t d) { } static INLINE fi_t fisetdi_fi_vf_vi2(vfloat d, vint2 i) { fi_t res; - res = vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_VI2(d)); - res = vset(res, 1, i); + res = __riscv_vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_VI2(d)); + res = __riscv_vset(res, 1, i); return res; } static INLINE vfloat2 dfigetdf_vf2_dfi(dfi_t d) { @@ -269,12 +269,12 @@ static INLINE vint2 dfigeti_vi2_dfi(dfi_t d) { } static INLINE dfi_t dfisetdfi_dfi_vf2_vi2(vfloat2 v, vint2 i) { dfi_t res; - res = vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v)); - res = vset(res, 2, i); + res = __riscv_vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v)); + res = __riscv_vset(res, 2, i); return res; } static INLINE dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) { - return vset(dfi, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v)); + return __riscv_vset(dfi, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v)); } // vfloat2 type static INLINE vfloat vf2getx_vf_vf2(vfloat2 v) { @@ -285,21 +285,21 @@ static INLINE vfloat vf2gety_vf_vf2(vfloat2 v) { } static INLINE vfloat2 vf2setxy_vf2_vf_vf(vfloat x, vfloat y) { vfloat2 res; - res = vset(res, 0, x); - res = vset(res, 1, y); + res = __riscv_vset(res, 0, x); + res = __riscv_vset(res, 1, y); return res; } static INLINE vfloat2 vf2setx_vf2_vf2_vf(vfloat2 v, vfloat d) { - return vset(v, 0, d); + return __riscv_vset(v, 0, d); } static INLINE vfloat2 vf2sety_vf2_vf2_vf(vfloat2 v, vfloat d) { - return vset(v, 1, d); + return __riscv_vset(v, 1, d); } // df2 type static df2 df2setab_df2_vf2_vf2(vfloat2 a, vfloat2 b) { df2 res; - res = vset(res, 0, a); - res = vset(res, 1, b); + res = __riscv_vset(res, 0, a); + res = __riscv_vset(res, 1, b); return res; } static vfloat2 df2geta_vf2_df2(df2 d) { return SLEEF_RVV_SP_VGET_VF2(d, 0); } @@ -322,13 +322,13 @@ static INLINE vfloat vrint_vf_vf(vfloat vd) { // It is not currently possible to safely set frm for intrinsics, // so emulate round-to-nearest behavior vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP); - half = vfsgnj(half, vd, VECTLENSP); - vfloat res = vfadd(vd, half, VECTLENSP); - vint2 i = vfcvt_rtz_x(res, VECTLENSP); - return vfcvt_f(i, VECTLENSP); + half = __riscv_vfsgnj(half, vd, VECTLENSP); + vfloat res = __riscv_vfadd(vd, half, VECTLENSP); + vint2 i = __riscv_vfcvt_rtz_x(res, VECTLENSP); + return __riscv_vfcvt_f(i, VECTLENSP); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { - return vfcvt_f(vi, VECTLENSP); + return __riscv_vfcvt_f(vi, VECTLENSP); } static INLINE vint2 vcast_vi2_i(int i) { return SLEEF_RVV_SP_VCAST_VI2_I(i, VECTLENSP); @@ -337,12 +337,12 @@ static INLINE vint2 vrint_vi2_vf(vfloat vf) { // It is not currently possible to safely set frm for intrinsics, // so emulate round-to-nearest behavior vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP); - half = vfsgnj(half, vf, VECTLENSP); - vfloat res = vfadd(vf, half, VECTLENSP); - return vfcvt_rtz_x(res, VECTLENSP); + half = __riscv_vfsgnj(half, vf, VECTLENSP); + vfloat res = __riscv_vfadd(vf, half, VECTLENSP); + return __riscv_vfcvt_rtz_x(res, VECTLENSP); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { - return vfcvt_rtz_x(vf, VECTLENSP); + return __riscv_vfcvt_rtz_x(vf, VECTLENSP); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return vcast_vf_vi2(vtruncate_vi2_vf(vf)); @@ -359,16 +359,16 @@ static INLINE vfloat vloadu_vf_p(const float *ptr) { return SLEEF_RVV_SP_LOAD_VF(ptr, VECTLENSP); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { - vse32(ptr, v, VECTLENSP); + __riscv_vse32(ptr, v, VECTLENSP); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { - vse32(ptr, v, VECTLENSP); + __riscv_vse32(ptr, v, VECTLENSP); } static INLINE void vstoreu_v_p_vi2(int32_t *ptr, vint2 v) { - vse32(ptr, v, VECTLENSP); + __riscv_vse32(ptr, v, VECTLENSP); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { - return vluxei32(ptr, vmul(SLEEF_RVV_SP_VREINTERPRET_VU(vi2), sizeof(float), VECTLENSP), VECTLENSP); + return __riscv_vluxei32(ptr, __riscv_vmul(SLEEF_RVV_SP_VREINTERPRET_VU(vi2), sizeof(float), VECTLENSP), VECTLENSP); } @@ -376,58 +376,58 @@ static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { /* Floating-Point Arithmetic */ /****************************************/ static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { - return vfadd(x, y, VECTLENSP); + return __riscv_vfadd(x, y, VECTLENSP); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { - return vfsub(x, y, VECTLENSP); + return __riscv_vfsub(x, y, VECTLENSP); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { - return vfmul(x, y, VECTLENSP); + return __riscv_vfmul(x, y, VECTLENSP); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { - return vfdiv(x, y, VECTLENSP); + return __riscv_vfdiv(x, y, VECTLENSP); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { - return vfmax(x, y, VECTLENSP); + return __riscv_vfmax(x, y, VECTLENSP); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { - return vfmin(x, y, VECTLENSP); + return __riscv_vfmin(x, y, VECTLENSP); } static INLINE vfloat vrec_vf_vf(vfloat d) { - return vfdiv(vcast_vf_f(1.0f), d, VECTLENSP); + return __riscv_vfdiv(vcast_vf_f(1.0f), d, VECTLENSP); } static INLINE vfloat vsqrt_vf_vf(vfloat d) { - return vfsqrt(d, VECTLENSP); + return __riscv_vfsqrt(d, VECTLENSP); } // fused multiply-add/subtract static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { - return vfmadd(x, y, z, VECTLENSP); + return __riscv_vfmadd(x, y, z, VECTLENSP); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { - return vfnmsub(x, y, z, VECTLENSP); + return __riscv_vfnmsub(x, y, z, VECTLENSP); } // sign manipulation static INLINE vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) { - return vfsgnjx(x, y, VECTLENSP); + return __riscv_vfsgnjx(x, y, VECTLENSP); } static INLINE vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) { - return vfsgnj(x, y, VECTLENSP); + return __riscv_vfsgnj(x, y, VECTLENSP); } static INLINE vfloat vsign_vf_vf(vfloat f) { - return vfsgnj(SLEEF_RVV_SP_VCAST_VF_F(1.0f, VECTLENSP), f, VECTLENSP); + return __riscv_vfsgnj(SLEEF_RVV_SP_VCAST_VF_F(1.0f, VECTLENSP), f, VECTLENSP); } static INLINE vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) { vint2 xi = SLEEF_RVV_SP_VREINTERPRET_VI2(x); vint2 yi = SLEEF_RVV_SP_VREINTERPRET_VI2(y); - vint2 xioryi = vor(xi, yi, VECTLENSP); + vint2 xioryi = __riscv_vor(xi, yi, VECTLENSP); vfloat xory = SLEEF_RVV_SP_VREINTERPRET_VF(xioryi); - return vfsgnj(x, xory, VECTLENSP); + return __riscv_vfsgnj(x, xory, VECTLENSP); } static INLINE vfloat vabs_vf_vf(vfloat f) { - return vfabs(f, VECTLENSP); + return __riscv_vfabs(f, VECTLENSP); } static INLINE vfloat vneg_vf_vf(vfloat f) { - return vfneg(f, VECTLENSP); + return __riscv_vfneg(f, VECTLENSP); } @@ -435,34 +435,34 @@ static INLINE vfloat vneg_vf_vf(vfloat f) { /* Integer Arithmetic and Logic */ /****************************************/ static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { - return vadd(x, y, VECTLENSP); + return __riscv_vadd(x, y, VECTLENSP); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { - return vsub(x, y, VECTLENSP); + return __riscv_vsub(x, y, VECTLENSP); } static INLINE vint2 vneg_vi2_vi2(vint2 x) { - return vneg(x, VECTLENSP); + return __riscv_vneg(x, VECTLENSP); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { - return vand(x, y, VECTLENSP); + return __riscv_vand(x, y, VECTLENSP); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { - return vand(vnot(x, VECTLENSP), y, VECTLENSP); + return __riscv_vand(__riscv_vnot(x, VECTLENSP), y, VECTLENSP); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { - return vor(x, y, VECTLENSP); + return __riscv_vor(x, y, VECTLENSP); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { - return vxor(x, y, VECTLENSP); + return __riscv_vxor(x, y, VECTLENSP); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { - return vsll(x, c, VECTLENSP); + return __riscv_vsll(x, c, VECTLENSP); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { - return vsra(x, c, VECTLENSP); + return __riscv_vsra(x, c, VECTLENSP); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { - return SLEEF_RVV_SP_VREINTERPRET_VI2(vsrl(SLEEF_RVV_SP_VREINTERPRET_VU2(x), c, VECTLENSP)); + return SLEEF_RVV_SP_VREINTERPRET_VI2(__riscv_vsrl(SLEEF_RVV_SP_VREINTERPRET_VU2(x), c, VECTLENSP)); } #ifdef ENABLE_RVV_SP @@ -470,37 +470,37 @@ static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { /* Bitmask Operations */ /****************************************/ static INLINE vfloat vreinterpret_vf_vm(vmask vm) { - return SLEEF_RVV_SP_VREINTERPRET_VF(vncvt_x(vm, VECTLENSP)); + return SLEEF_RVV_SP_VREINTERPRET_VF(__riscv_vncvt_x(vm, VECTLENSP)); } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { - return vwcvtu_x(SLEEF_RVV_SP_VREINTERPRET_VU(vf), VECTLENSP); + return __riscv_vwcvtu_x(SLEEF_RVV_SP_VREINTERPRET_VU(vf), VECTLENSP); } static INLINE int vtestallones_i_vo32(vopmask g) { - return vcpop(g, VECTLENSP) == VECTLENSP; + return __riscv_vcpop(g, VECTLENSP) == VECTLENSP; } static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) { return SLEEF_RVV_SP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENSP); } static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { - return vand(x, y, VECTLENSP); + return __riscv_vand(x, y, VECTLENSP); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { - return vor(x, y, VECTLENSP); + return __riscv_vor(x, y, VECTLENSP); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { - return vxor(x, y, VECTLENSP); + return __riscv_vxor(x, y, VECTLENSP); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { - return vand(SLEEF_RVV_SP_VREINTERPRET_VM(vnot(SLEEF_RVV_SP_VREINTERPRET_VI64(x), VECTLENSP)), y, VECTLENSP); + return __riscv_vand(SLEEF_RVV_SP_VREINTERPRET_VM(__riscv_vnot(SLEEF_RVV_SP_VREINTERPRET_VI64(x), VECTLENSP)), y, VECTLENSP); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { - return vmerge(x, y, -1, VECTLENSP); + return __riscv_vmerge(y, -1, x, VECTLENSP); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { - return vmerge(vmnot(x, VECTLENSP), y, 0, VECTLENSP); + return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENSP), VECTLENSP); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { - return vmerge(x, y, 0, VECTLENSP); + return __riscv_vmerge(y, 0, x, VECTLENSP); } @@ -508,75 +508,75 @@ static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { /* Logical Mask Operations */ /****************************************/ static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { - return vmand(x, y, VECTLENSP); + return __riscv_vmand(x, y, VECTLENSP); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { - return vmandn(y, x, VECTLENSP); + return __riscv_vmandn(y, x, VECTLENSP); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { - return vmor(x, y, VECTLENSP); + return __riscv_vmor(x, y, VECTLENSP); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { - return vmxor(x, y, VECTLENSP); + return __riscv_vmxor(x, y, VECTLENSP); } // single precision FP comparison static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { - return vmfeq(x, y, VECTLENSP); + return __riscv_vmfeq(x, y, VECTLENSP); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { - return vmfne(x, y, VECTLENSP); + return __riscv_vmfne(x, y, VECTLENSP); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { - return vmfgt(x, y, VECTLENSP); + return __riscv_vmfgt(x, y, VECTLENSP); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { - return vmfge(x, y, VECTLENSP); + return __riscv_vmfge(x, y, VECTLENSP); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { - return vmflt(x, y, VECTLENSP); + return __riscv_vmflt(x, y, VECTLENSP); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { - return vmfle(x, y, VECTLENSP); + return __riscv_vmfle(x, y, VECTLENSP); } static INLINE vopmask visnan_vo_vf(vfloat d) { - return vmfne(d, d, VECTLENSP); + return __riscv_vmfne(d, d, VECTLENSP); } static INLINE vopmask visinf_vo_vf(vfloat d) { - return vmfeq(vfabs(d, VECTLENSP), SLEEF_INFINITYf, VECTLENSP); + return __riscv_vmfeq(__riscv_vfabs(d, VECTLENSP), SLEEF_INFINITYf, VECTLENSP); } static INLINE vopmask vispinf_vo_vf(vfloat d) { - return vmfeq(d, SLEEF_INFINITYf, VECTLENSP); + return __riscv_vmfeq(d, SLEEF_INFINITYf, VECTLENSP); } // conditional select static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { - return vmerge(mask, y, x, VECTLENSP); + return __riscv_vmerge(y, x, mask, VECTLENSP); } static INLINE vfloat vsel_vf_vo_f_f(vopmask mask, float v1, float v0) { - return vfmerge(mask, vcast_vf_f(v0), v1, VECTLENSP); + return __riscv_vfmerge(vcast_vf_f(v0), v1, mask, VECTLENSP); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { - return vfmerge(o0, vfmerge(o1, vcast_vf_f(d2), d1, VECTLENSP), d0, VECTLENSP); + return __riscv_vfmerge(__riscv_vfmerge(vcast_vf_f(d2), d1, o1, VECTLENSP), d0, o0, VECTLENSP); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { - return vfmerge(o0, vfmerge(o1, vfmerge(o2, vcast_vf_f(d3), d2, VECTLENSP), d1, VECTLENSP), d0, VECTLENSP); + return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vf_f(d3), d2, o2, VECTLENSP), d1, o1, VECTLENSP), d0, o0, VECTLENSP); } // integer comparison static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { - return vmseq(x, y, VECTLENSP); + return __riscv_vmseq(x, y, VECTLENSP); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { - return vmsgt(x, y, VECTLENSP); + return __riscv_vmsgt(x, y, VECTLENSP); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 zero = vcast_vi2_i(0); - return vmerge(vmsgt(x, y, VECTLENSP), zero, -1, VECTLENSP); + return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, VECTLENSP), VECTLENSP); } // integer conditional select static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { - return vmerge(m, y, x, VECTLENSP); + return __riscv_vmerge(y, x, m, VECTLENSP); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { - return vmerge(vmnot(x, VECTLENSP), y, 0, VECTLENSP); + return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENSP), VECTLENSP); } #endif // ENABLE_RVV_SP @@ -597,21 +597,21 @@ static INLINE const vdouble vd2gety_vd_vd2(vdouble2 v) { } static INLINE const vdouble2 vd2setxy_vd2_vd_vd(vdouble x, vdouble y) { vdouble2 res; - res = vset(res, 0, x); - res = vset(res, 1, y); + res = __riscv_vset(res, 0, x); + res = __riscv_vset(res, 1, y); return res; } static INLINE const vdouble2 vd2setx_vd2_vd2_vd(vdouble2 v, vdouble d) { - return vset(v, 0, d); + return __riscv_vset(v, 0, d); } static INLINE const vdouble2 vd2sety_vd2_vd2_vd(vdouble2 v, vdouble d) { - return vset(v, 1, d); + return __riscv_vset(v, 1, d); } // dd2 type static dd2 dd2setab_dd2_vd2_vd2(vdouble2 a, vdouble2 b) { dd2 res; - res = vset(res, 0, a); - res = vset(res, 1, b); + res = __riscv_vset(res, 0, a); + res = __riscv_vset(res, 1, b); return res; } static vdouble2 dd2geta_vd2_dd2(dd2 d) { return SLEEF_RVV_DP_VGET_4VD(d, 0); } @@ -622,32 +622,32 @@ static INLINE vdouble vd3gety_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v static INLINE vdouble vd3getz_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 2); } static INLINE vdouble3 vd3setxyz_vd3_vd_vd_vd(vdouble x, vdouble y, vdouble z) { vdouble3 res; - res = vset(res, 0, x); - res = vset(res, 1, y); - res = vset(res, 2, z); + res = __riscv_vset(res, 0, x); + res = __riscv_vset(res, 1, y); + res = __riscv_vset(res, 2, z); return res; } -static INLINE vdouble3 vd3setx_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 0, d); } -static INLINE vdouble3 vd3sety_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 1, d); } -static INLINE vdouble3 vd3setz_vd3_vd3_vd(vdouble3 v, vdouble d) { return vset(v, 2, d); } +static INLINE vdouble3 vd3setx_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 0, d); } +static INLINE vdouble3 vd3sety_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 1, d); } +static INLINE vdouble3 vd3setz_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 2, d); } // di type static INLINE vdouble digetd_vd_di(di_t d) { return SLEEF_RVV_DP_VGET_VD(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(d), 0); } static INLINE vint digeti_vi_di(di_t d) { #ifdef ENABLE_RVVM1 - return vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 1)); + return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 1)); #else return SLEEF_RVV_DP_VGET_VI(d, 2); #endif } static INLINE di_t disetdi_di_vd_vi(vdouble d, vint i) { di_t res; - res = SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(vset(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(res), 0, d)); + res = SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(res), 0, d)); #ifdef ENABLE_RVVM1 - res = vset(res, 1, vlmul_ext_i32m1(i)); + res = __riscv_vset(res, 1, __riscv_vlmul_ext_i32m1(i)); #else - res = vset(res, 2, i); + res = __riscv_vset(res, 2, i); #endif return res; } @@ -657,23 +657,23 @@ static INLINE vdouble2 ddigetdd_vd2_ddi(ddi_t d) { } static INLINE vint ddigeti_vi_ddi(ddi_t d) { #ifdef ENABLE_RVVM1 - return vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 2)); + return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 2)); #else return SLEEF_RVV_DP_VGET_VI(d, 4); #endif } static INLINE ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) { ddi_t res; - res = SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(res), 0, v)); + res = SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(res), 0, v)); #ifdef ENABLE_RVVM1 - res = vset(res, 2, vlmul_ext_i32m1(i)); + res = __riscv_vset(res, 2, __riscv_vlmul_ext_i32m1(i)); #else - res = vset(res, 4, i); + res = __riscv_vset(res, 4, i); #endif return res; } static INLINE ddi_t ddisetdd_ddi_ddi_vd2(ddi_t ddi, vdouble2 v) { - return SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(ddi), 0, v)); + return SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(ddi), 0, v)); } /****************************************/ @@ -692,20 +692,20 @@ static INLINE vint vrint_vi_vd(vdouble vd) { // It is not currently possible to safely set frm for intrinsics, // so emulate round-to-nearest behavior vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP); - half = vfsgnj(half, vd, VECTLENDP); - vdouble res = vfadd(vd, half, VECTLENDP); - return vfncvt_rtz_x(res, VECTLENDP); + half = __riscv_vfsgnj(half, vd, VECTLENDP); + vdouble res = __riscv_vfadd(vd, half, VECTLENDP); + return __riscv_vfncvt_rtz_x(res, VECTLENDP); } static INLINE vdouble vrint_vd_vd(vdouble vd) { // It is not currently possible to safely set frm for intrinsics, // so emulate round-to-nearest behavior vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP); - half = vfsgnj(half, vd, VECTLENDP); - vdouble res = vfadd(vd, half, VECTLENDP); - return vfwcvt_f(vfncvt_rtz_x(res, VECTLENDP), VECTLENDP); + half = __riscv_vfsgnj(half, vd, VECTLENDP); + vdouble res = __riscv_vfadd(vd, half, VECTLENDP); + return __riscv_vfwcvt_f(__riscv_vfncvt_rtz_x(res, VECTLENDP), VECTLENDP); } static INLINE vint vtruncate_vi_vd(vdouble vd) { - return vfncvt_rtz_x(vd, VECTLENDP); + return __riscv_vfncvt_rtz_x(vd, VECTLENDP); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vcast_vd_vi(vtruncate_vi_vd(vd)); @@ -725,16 +725,16 @@ static INLINE vint vloadu_vi_p(int32_t *p) { return SLEEF_RVV_DP_LOAD_VI(p, VECTLENDP); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { - vse64(ptr, v, VECTLENDP); + __riscv_vse64(ptr, v, VECTLENDP); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { - vse64(ptr, v, VECTLENDP); + __riscv_vse64(ptr, v, VECTLENDP); } static INLINE void vstoreu_v_p_vi(int32_t *ptr, vint v) { - vse32(ptr, v, VECTLENDP); + __riscv_vse32(ptr, v, VECTLENDP); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { - return vluxei64(ptr, vwmulu(SLEEF_RVV_DP_VREINTERPRET_VU(vi), sizeof(double), VECTLENDP), VECTLENDP); + return __riscv_vluxei64(ptr, __riscv_vwmulu(SLEEF_RVV_DP_VREINTERPRET_VU(vi), sizeof(double), VECTLENDP), VECTLENDP); } @@ -742,60 +742,60 @@ static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { /* Floating-Point Arithmetic */ /****************************************/ static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { - return vfadd(x, y, VECTLENDP); + return __riscv_vfadd(x, y, VECTLENDP); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { - return vfsub(x, y, VECTLENDP); + return __riscv_vfsub(x, y, VECTLENDP); } static INLINE vdouble vrec_vd_vd(vdouble d) { - return vfdiv(vcast_vd_d(1.0), d, VECTLENDP); + return __riscv_vfdiv(vcast_vd_d(1.0), d, VECTLENDP); } static INLINE vdouble vabs_vd_vd(vdouble d) { - return vfabs(d, VECTLENDP); + return __riscv_vfabs(d, VECTLENDP); } static INLINE vdouble vsqrt_vd_vd(vdouble d) { - return vfsqrt(d, VECTLENDP); + return __riscv_vfsqrt(d, VECTLENDP); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { - return vfmul(x, y, VECTLENDP); + return __riscv_vfmul(x, y, VECTLENDP); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { - return vfdiv(x, y, VECTLENDP); + return __riscv_vfdiv(x, y, VECTLENDP); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { - return vfmax(x, y, VECTLENDP); + return __riscv_vfmax(x, y, VECTLENDP); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { - return vfmin(x, y, VECTLENDP); + return __riscv_vfmin(x, y, VECTLENDP); } // fused multiply add / sub static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { - return vfmadd(x, y, z, VECTLENDP); + return __riscv_vfmadd(x, y, z, VECTLENDP); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { - return vfmsub(x, y, z, VECTLENDP); + return __riscv_vfmsub(x, y, z, VECTLENDP); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { - return vfmadd(x, y, z, VECTLENDP); + return __riscv_vfmadd(x, y, z, VECTLENDP); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { - return vfnmsub(x, y, z, VECTLENDP); + return __riscv_vfnmsub(x, y, z, VECTLENDP); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { - return vfmsub(x, y, z, VECTLENDP); + return __riscv_vfmsub(x, y, z, VECTLENDP); } // sign manipulation static INLINE vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) { - return vfsgnjx(x, y, VECTLENDP); + return __riscv_vfsgnjx(x, y, VECTLENDP); } static INLINE vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) { - return vfsgnj(x, y, VECTLENDP); + return __riscv_vfsgnj(x, y, VECTLENDP); } static INLINE vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) { - return vfsgnj(x, SLEEF_RVV_DP_VREINTERPRET_VD(vor(SLEEF_RVV_DP_VREINTERPRET_VM(x), SLEEF_RVV_DP_VREINTERPRET_VM(y), VECTLENDP)), VECTLENDP); + return __riscv_vfsgnj(x, SLEEF_RVV_DP_VREINTERPRET_VD(__riscv_vor(SLEEF_RVV_DP_VREINTERPRET_VM(x), SLEEF_RVV_DP_VREINTERPRET_VM(y), VECTLENDP)), VECTLENDP); } static INLINE vdouble vneg_vd_vd(vdouble d) { - return vfneg(d, VECTLENDP); + return __riscv_vfneg(d, VECTLENDP); } @@ -803,34 +803,34 @@ static INLINE vdouble vneg_vd_vd(vdouble d) { /* Integer Arithmetic and Logic */ /****************************************/ static INLINE vint vadd_vi_vi_vi(vint x, vint y) { - return vadd(x, y, VECTLENDP); + return __riscv_vadd(x, y, VECTLENDP); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { - return vsub(x, y, VECTLENDP); + return __riscv_vsub(x, y, VECTLENDP); } static INLINE vint vneg_vi_vi(vint x) { - return vneg(x, VECTLENDP); + return __riscv_vneg(x, VECTLENDP); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { - return vand(x, y, VECTLENDP); + return __riscv_vand(x, y, VECTLENDP); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { - return vand(vnot(x, VECTLENDP), y, VECTLENDP); + return __riscv_vand(__riscv_vnot(x, VECTLENDP), y, VECTLENDP); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { - return vor(x, y, VECTLENDP); + return __riscv_vor(x, y, VECTLENDP); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { - return vxor(x, y, VECTLENDP); + return __riscv_vxor(x, y, VECTLENDP); } static INLINE vint vsll_vi_vi_i(vint x, int c) { - return vsll(x, c, VECTLENDP); + return __riscv_vsll(x, c, VECTLENDP); } static INLINE vint vsra_vi_vi_i(vint x, int c) { - return vsra(x, c, VECTLENDP); + return __riscv_vsra(x, c, VECTLENDP); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { - return SLEEF_RVV_DP_VREINTERPRET_VI(vsrl(SLEEF_RVV_DP_VREINTERPRET_VU(x), c, VECTLENDP)); + return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vsrl(SLEEF_RVV_DP_VREINTERPRET_VU(x), c, VECTLENDP)); } @@ -848,55 +848,55 @@ static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) { return SLEEF_RVV_DP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENDP); } static INLINE vmask vcast_vm_vi(vint vi) { - return SLEEF_RVV_DP_VREINTERPRET_VM(vwcvt_x(vi, VECTLENDP)); + return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vwcvt_x(vi, VECTLENDP)); } static INLINE vmask vcastu_vm_vi(vint vi) { - return vsll(SLEEF_RVV_DP_VREINTERPRET_VM(vwcvt_x(vi, VECTLENDP)), 32, VECTLENDP); + return __riscv_vsll(SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vwcvt_x(vi, VECTLENDP)), 32, VECTLENDP); } static INLINE vint vcastu_vi_vm(vmask vm) { - return SLEEF_RVV_DP_VREINTERPRET_VI(vnsrl(vm, 32, VECTLENDP)); + return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vnsrl(vm, 32, VECTLENDP)); } static INLINE vint vcast_vi_vm(vmask vm) { - return SLEEF_RVV_DP_VREINTERPRET_VI(vncvt_x(vm, VECTLENDP)); + return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vncvt_x(vm, VECTLENDP)); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { - return vmerge(vmnot(x, VECTLENDP), y, 0, VECTLENDP); + return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENDP), VECTLENDP); } static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { - return vand(x, y, VECTLENDP); + return __riscv_vand(x, y, VECTLENDP); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { - return vor(x, y, VECTLENDP); + return __riscv_vor(x, y, VECTLENDP); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { - return vxor(x, y, VECTLENDP); + return __riscv_vxor(x, y, VECTLENDP); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { - return vand(SLEEF_RVV_DP_VREINTERPRET_VM(vnot(SLEEF_RVV_DP_VREINTERPRET_VI64(x), VECTLENDP)), y, VECTLENDP); + return __riscv_vand(SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vnot(SLEEF_RVV_DP_VREINTERPRET_VI64(x), VECTLENDP)), y, VECTLENDP); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { - return vmerge(x, y, 0, VECTLENDP); + return __riscv_vmerge(y, 0, x, VECTLENDP); } static INLINE vmask vsll64_vm_vm_i(vmask mask, int64_t c) { - return vsll(mask, c, VECTLENDP); + return __riscv_vsll(mask, c, VECTLENDP); } static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { - return SLEEF_RVV_DP_VREINTERPRET_VM(vsub(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP)); + return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vsub(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP)); } static INLINE vmask vsrl64_vm_vm_i(vmask mask, int64_t c) { - return vsrl(mask, c, VECTLENDP); + return __riscv_vsrl(mask, c, VECTLENDP); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { - return vadd(x, y, VECTLENDP); + return __riscv_vadd(x, y, VECTLENDP); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { - return vmerge(x, y, -1, VECTLENDP); + return __riscv_vmerge(y, -1, x, VECTLENDP); } static INLINE vmask vsel_vm_vo64_vm_vm(vopmask mask, vmask x, vmask y) { - return vmerge(mask, y, x, VECTLENDP); + return __riscv_vmerge(y, x, mask, VECTLENDP); } static INLINE vmask vneg64_vm_vm(vmask mask) { - return SLEEF_RVV_DP_VREINTERPRET_VM(vneg(SLEEF_RVV_DP_VREINTERPRET_VI64(mask), VECTLENDP)); + return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vneg(SLEEF_RVV_DP_VREINTERPRET_VI64(mask), VECTLENDP)); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return SLEEF_RVV_DP_VREINTERPRET_VD(vm); @@ -910,12 +910,12 @@ static INLINE const vmask vqgetx_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v, static INLINE const vmask vqgety_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v, 1); } static INLINE vquad vqsetxy_vq_vm_vm(vmask x, vmask y) { vquad res; - res = vset(res, 0, x); - res = vset(res, 1, y); + res = __riscv_vset(res, 0, x); + res = __riscv_vset(res, 1, y); return res; } -static INLINE vquad vqsetx_vq_vq_vm(vquad v, vmask x) { return vset(v, 0, x); } -static INLINE vquad vqsety_vq_vq_vm(vquad v, vmask y) { return vset(v, 1, y); } +static INLINE vquad vqsetx_vq_vq_vm(vquad v, vmask x) { return __riscv_vset(v, 0, x); } +static INLINE vquad vqsety_vq_vq_vm(vquad v, vmask y) { return __riscv_vset(v, 1, y); } @@ -929,87 +929,87 @@ static INLINE vopmask vcast_vo32_vo64(vopmask vo) { return vo; } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { - return vmand(x, y, VECTLENDP); + return __riscv_vmand(x, y, VECTLENDP); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { - return vmandn(y, x, VECTLENDP); + return __riscv_vmandn(y, x, VECTLENDP); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { - return vmor(x, y, VECTLENDP); + return __riscv_vmor(x, y, VECTLENDP); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { - return vmxor(x, y, VECTLENDP); + return __riscv_vmxor(x, y, VECTLENDP); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { - return vmseq(x, y, VECTLENDP); + return __riscv_vmseq(x, y, VECTLENDP); } static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { - return vmsgt(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP); + return __riscv_vmsgt(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP); } // double-precision comparison static INLINE vopmask visinf_vo_vd(vdouble d) { - return vmfeq(vfabs(d, VECTLENDP), SLEEF_INFINITY, VECTLENDP); + return __riscv_vmfeq(__riscv_vfabs(d, VECTLENDP), SLEEF_INFINITY, VECTLENDP); } static INLINE vopmask vispinf_vo_vd(vdouble d) { - return vmfeq(d, SLEEF_INFINITY, VECTLENDP); + return __riscv_vmfeq(d, SLEEF_INFINITY, VECTLENDP); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { - return vmfeq(x, y, VECTLENDP); + return __riscv_vmfeq(x, y, VECTLENDP); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { - return vmfne(x, y, VECTLENDP); + return __riscv_vmfne(x, y, VECTLENDP); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { - return vmflt(x, y, VECTLENDP); + return __riscv_vmflt(x, y, VECTLENDP); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { - return vmfle(x, y, VECTLENDP); + return __riscv_vmfle(x, y, VECTLENDP); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { - return vmfgt(x, y, VECTLENDP); + return __riscv_vmfgt(x, y, VECTLENDP); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { - return vmfge(x, y, VECTLENDP); + return __riscv_vmfge(x, y, VECTLENDP); } static INLINE vopmask visnan_vo_vd(vdouble d) { - return vmfne(d, d, VECTLENDP); + return __riscv_vmfne(d, d, VECTLENDP); } // double-precision conditional select static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) { - return vmerge(mask, y, x, VECTLENDP); + return __riscv_vmerge(y, x, mask, VECTLENDP); } static INLINE vdouble vsel_vd_vo_d_d(vopmask mask, double v0, double v1) { - return vfmerge(mask, vcast_vd_d(v1), v0, VECTLENDP); + return __riscv_vfmerge(vcast_vd_d(v1), v0, mask, VECTLENDP); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { - return vfmerge(o0, vfmerge(o1, vcast_vd_d(d2), d1, VECTLENDP), d0, VECTLENDP); + return __riscv_vfmerge(__riscv_vfmerge(vcast_vd_d(d2), d1, o1, VECTLENDP), d0, o0, VECTLENDP); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { - return vfmerge(o0, vfmerge(o1, vfmerge(o2, vcast_vd_d(d3), d2, VECTLENDP), d1, VECTLENDP), d0, VECTLENDP); + return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vd_d(d3), d2, o2, VECTLENDP), d1, o1, VECTLENDP), d0, o0, VECTLENDP); } static INLINE int vtestallones_i_vo64(vopmask g) { - return vcpop(g, VECTLENDP) == VECTLENDP; + return __riscv_vcpop(g, VECTLENDP) == VECTLENDP; } // integer comparison static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { - return vmseq(x, y, VECTLENDP); + return __riscv_vmseq(x, y, VECTLENDP); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { - return vmsgt(x, y, VECTLENDP); + return __riscv_vmsgt(x, y, VECTLENDP); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { vint zero = vcast_vi_i(0); - return vmerge(vmsgt(x, y, VECTLENDP), zero, -1, VECTLENDP); + return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, VECTLENDP), VECTLENDP); } // integer conditional select static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { - return vmerge(m, y, x, VECTLENDP); + return __riscv_vmerge(y, x, m, VECTLENDP); } static INLINE vint vandnot_vi_vo_vi(vopmask mask, vint vi) { - return vmerge(mask, vi, 0, VECTLENDP); + return __riscv_vmerge(vi, 0, mask, VECTLENDP); } static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { - return vmerge(vmnot(x, VECTLENDP), y, 0, VECTLENDP); + return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENDP), VECTLENDP); } #endif // ENABLE_RVV_DP From 55fd4530f756bdad9971212634952e4c3f07663d Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Sat, 4 Nov 2023 11:06:45 +0000 Subject: [PATCH 04/24] Add riscv64 CI on GitHub Actions --- .github/workflows/build_and_test.yml | 36 ++++++++++++++++++++++++-- Configure.cmake | 38 ++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3540b600..158a091d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -33,8 +33,8 @@ jobs: run: sudo apt-get update -y -qq && sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev - name: Build native + shell: bash -ex -o pipefail {0} run: | - set -x EXTRA_CMAKE_FLAGS="-DENFORCE_SSE2=ON -DENFORCE_SSE4=ON -DENFORCE_AVX=ON -DENFORCE_AVX=ON -DENFORCE_AVX2=ON -DENFORCE_AVX512F=ON -DENFORCE_FMA4=ON" cmake -S . -B _build-native -GNinja \ -DCMAKE_INSTALL_PREFIX=$(pwd)/_install-native \ @@ -108,6 +108,8 @@ jobs: package: -powerpc64le-linux-gnu # IBM Z - arch: s390x + # RISC-V + - arch: riscv64 name: build-${{ matrix.arch }} steps: @@ -120,6 +122,14 @@ jobs: sudo apt-get update -y -qq sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.package || format('-{0}-linux-gnu', matrix.arch) }} + - name: Download riscv-gnu-toolchain's LLVM build + env: + RISCV_GNU_TOOLCHAIN_TAG: "2023.10.18" + run: | + curl -L https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${RISCV_GNU_TOOLCHAIN_TAG}/riscv64-glibc-ubuntu-20.04-llvm-nightly-${RISCV_GNU_TOOLCHAIN_TAG}-nightly.tar.gz | + tar xzf - -C /opt + if: ${{ matrix.arch == 'riscv64' }} + - name: Download build-native artifacts uses: actions/download-artifact@v3 with: @@ -130,8 +140,13 @@ jobs: chmod +x _build-native/bin/* - name: Build ${{ matrix.arch }} + shell: bash -ex -o pipefail {0} run: | - set -x + # Add riscv-gnu-toolchain to PATH + if [[ ${{ matrix.arch }} = "riscv64" ]]; then + export PATH="/opt/riscv/bin:$PATH" + fi + EXTRA_CMAKE_FLAGS="" if [[ ${{ matrix.arch }} = "aarch64" ]]; then EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_SVE=ON" @@ -144,6 +159,14 @@ jobs: EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE=ON" # Disable VXE2 support, QEMU doesn't support it EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON" + elif [[ ${{ matrix.arch }} = "riscv64" ]]; then + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_RVVM1=ON -DENFORCE_RVVM2=ON" + # Disable inline headers, they just don't compile on riscv64 + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_INLINE_HEADERS=OFF" + # Disable dft, it fails with linker error to `cexp` + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_DFT=OFF" + # Disable quad, it's missing the `Sleef_quad` function + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_QUAD=OFF" fi cmake -S . -B _build-${{ matrix.arch }} -GNinja \ -DCMAKE_INSTALL_PREFIX="$(pwd)/_install-${{ matrix.arch }}" \ @@ -190,6 +213,15 @@ jobs: # IBM Z # TODO: figure out qemu_cpu variable to make tests pass on QEMU - arch: s390x + # RISC-V + - arch: riscv64 + qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=false" + - arch: riscv64 + qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0" + - arch: riscv64 + qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0" + - arch: riscv64 + qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0" name: "test-${{ matrix.arch }} (qemu_cpu: \"${{ matrix.qemu_cpu }}\")" steps: diff --git a/Configure.cmake b/Configure.cmake index 25392d09..6cb9945f 100644 --- a/Configure.cmake +++ b/Configure.cmake @@ -121,8 +121,6 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z14;-mzvector") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") set(SLEEF_ARCH_RISCV64 ON CACHE INTERNAL "True for RISCV64 architecture.") - set(COMPILER_SUPPORTS_RVVM1 1) - set(COMPILER_SUPPORTS_RVVM2 1) endif() set(COMPILER_SUPPORTS_PUREC_SCALAR 1) @@ -623,6 +621,42 @@ if (ENFORCE_VXE2 AND NOT COMPILER_SUPPORTS_VXE2) message(FATAL_ERROR "ENFORCE_VXE2 is specified and that feature is disabled or not supported by the compiler") endif() +# RVVM1 + +option(DISABLE_RVVM1 "Disable RVVM1" OFF) +option(ENFORCE_RVVM1 "Build fails if RVVM1 is not supported by the compiler" OFF) + +if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM1) + string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_RVVM1}") + CHECK_C_SOURCE_COMPILES(" + #include + int main() { + vint32m1_t r = __riscv_vmv_v_x_i32m1(1, __riscv_v_min_vlen / 32); }" + COMPILER_SUPPORTS_RVVM1) +endif() + +if (ENFORCE_RVVM1 AND NOT COMPILER_SUPPORTS_RVVM1) + message(FATAL_ERROR "ENFORCE_RVVM1 is specified and that feature is disabled or not supported by the compiler") +endif() + +# RVVM2 + +option(DISABLE_RVVM2 "Disable RVVM2" OFF) +option(ENFORCE_RVVM2 "Build fails if RVVM2 is not supported by the compiler" OFF) + +if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM2) + string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_RVVM2}") + CHECK_C_SOURCE_COMPILES(" + #include + int main() { + vint32m2_t r = __riscv_vmv_v_x_i32m2(1, __riscv_v_min_vlen / 32); }" + COMPILER_SUPPORTS_RVVM2) +endif() + +if (ENFORCE_RVVM2 AND NOT COMPILER_SUPPORTS_RVVM2) + message(FATAL_ERROR "ENFORCE_RVVM2 is specified and that feature is disabled or not supported by the compiler") +endif() + # CUDA option(ENFORCE_CUDA "Build fails if CUDA is not supported" OFF) From 8339480189e0504a65862c0e48a92156abe53d6c Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Mon, 13 Nov 2023 11:11:30 +0000 Subject: [PATCH 05/24] Add gcc and llvm builds --- .github/workflows/build_and_test.yml | 181 +++++++++++++----- travis/before_script.aarch64-gcc.sh | 2 +- travis/before_script.armhf-gcc.sh | 2 +- ...ch64.cmake => toolchain-aarch64-gcc.cmake} | 0 travis/toolchain-aarch64-llvm.cmake | 12 ++ ...-armhf.cmake => toolchain-armhf-gcc.cmake} | 0 travis/toolchain-armhf-llvm.cmake | 12 ++ travis/toolchain-native-gcc.cmake | 1 + travis/toolchain-native-llvm.cmake | 1 + ...64el.cmake => toolchain-ppc64el-gcc.cmake} | 0 travis/toolchain-ppc64el-llvm.cmake | 14 ++ ...cv64.cmake => toolchain-riscv64-gcc.cmake} | 2 +- travis/toolchain-riscv64-llvm.cmake | 12 ++ ...-s390x.cmake => toolchain-s390x-gcc.cmake} | 0 travis/toolchain-s390x-llvm.cmake | 12 ++ 15 files changed, 204 insertions(+), 47 deletions(-) rename travis/{toolchain-aarch64.cmake => toolchain-aarch64-gcc.cmake} (100%) create mode 100644 travis/toolchain-aarch64-llvm.cmake rename travis/{toolchain-armhf.cmake => toolchain-armhf-gcc.cmake} (100%) create mode 100644 travis/toolchain-armhf-llvm.cmake create mode 100644 travis/toolchain-native-gcc.cmake create mode 100644 travis/toolchain-native-llvm.cmake rename travis/{toolchain-ppc64el.cmake => toolchain-ppc64el-gcc.cmake} (100%) create mode 100644 travis/toolchain-ppc64el-llvm.cmake rename travis/{toolchain-riscv64.cmake => toolchain-riscv64-gcc.cmake} (79%) create mode 100644 travis/toolchain-riscv64-llvm.cmake rename travis/{toolchain-s390x.cmake => toolchain-s390x-gcc.cmake} (100%) create mode 100644 travis/toolchain-s390x-llvm.cmake diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 158a091d..6f611be0 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -7,6 +7,10 @@ on: push: pull_request: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + permissions: contents: read @@ -24,13 +28,21 @@ env: jobs: build-native: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + compiler: [gcc, llvm] + + name: build-native-${{ matrix.compiler }} steps: - uses: actions/checkout@v4.1.1 with: persist-credentials: false - name: Install dependencies - run: sudo apt-get update -y -qq && sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev + run: | + sudo apt-get update -y -qq + sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev - name: Build native shell: bash -ex -o pipefail {0} @@ -38,15 +50,16 @@ jobs: EXTRA_CMAKE_FLAGS="-DENFORCE_SSE2=ON -DENFORCE_SSE4=ON -DENFORCE_AVX=ON -DENFORCE_AVX=ON -DENFORCE_AVX2=ON -DENFORCE_AVX512F=ON -DENFORCE_FMA4=ON" cmake -S . -B _build-native -GNinja \ -DCMAKE_INSTALL_PREFIX=$(pwd)/_install-native \ + -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-native-${{ matrix.compiler }}.cmake \ ${COMMON_CMAKE_FLAGS} \ ${EXTRA_CMAKE_FLAGS} cmake --build _build-native cmake --install _build-native - - name: Upload build-native artifacts + - name: Upload build-native-${{ matrix.compiler }} artifacts uses: actions/upload-artifact@v3 with: - name: build-native + name: build-native-${{ matrix.compiler }} path: | _build-* _install-* @@ -55,6 +68,12 @@ jobs: test-native: runs-on: ubuntu-latest needs: [build-native] + strategy: + fail-fast: false + matrix: + compiler: [gcc, llvm] + + name: test-native-${{ matrix.compiler }} steps: - uses: actions/checkout@v4.1.1 with: @@ -67,12 +86,12 @@ jobs: run: | cat /proc/cpuinfo - - name: Download build-native artifacts + - name: Download build-native-${{ matrix.compiler }} artifacts uses: actions/download-artifact@v3 with: - name: build-native + name: build-native-${{ matrix.compiler }} - - name: Fix build-native permissions + - name: Fix _build-native permissions run: | chmod +x _build-native/bin/* @@ -97,21 +116,40 @@ jobs: strategy: fail-fast: false matrix: + compiler: [gcc, llvm] + arch: [aarch64, armhf, ppc64el, s390x, riscv64] include: - # AArch64 - - arch: aarch64 - # Aarch32 - arch: armhf - package: -arm-linux-gnueabihf - # PPC64 + gnupkg: -arm-linux-gnueabihf - arch: ppc64el - package: -powerpc64le-linux-gnu - # IBM Z + gnupkg: -powerpc64le-linux-gnu + exclude: + # It fails with the following error: + # ``` + # FAILED: include/sleefinline_vsx3.h /home/runner/work/sleef/sleef/_build-ppc64el/include/sleefinline_vsx3.h + # cd /home/runner/work/sleef/sleef/_build-ppc64el/src/libm && /usr/bin/clang -E -C -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/ -DSLEEF_GENHEADER -DENABLE_VSX3 -DDORENAME /home/runner/work/sleef/sleef/src/libm/sleefsimddp.c > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 && /usr/bin/sed -n -e "/^\\/\\/@#.*\$/p" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp2 && /usr/bin/sed -e "s/^\\/\\/@#/#/g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp2 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/macroonlyVSX3.h && /usr/bin/clang -E -C -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/ -DSLEEF_GENHEADER -DENABLE_VSX3 -DDORENAME /home/runner/work/sleef/sleef/src/libm/sleefsimdsp.c >> /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 && /usr/bin/sed -e "s/^#.*//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.c && /usr/bin/clang -E /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.c > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp3 && /usr/bin/sed -e s/SLEEF_VERSION_SLEEF/3.6.0/g -e s/SLEEF_SIMD_SLEEF/VSX3/g /home/runner/work/sleef/sleef/src/libm/sleefinline_header.h.org > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 && /usr/bin/sed -e "s/^#.*//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp3 >> /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 && /usr/bin/sed -e "s/^SLEEFSHARP/#/g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp5 && /usr/bin/sed -e s/SLEEFXXX//g /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp5 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp6 && /usr/bin/sed -e "s/^[[:space:]]*\$//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp6 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp7 && /usr/bin/sed "/^\$/N;/^\\n\$/D" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp7 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp8 && /home/runner/work/sleef/sleef/_build-native/bin/addSuffix /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp8 /home/runner/work/sleef/sleef/src/common/keywords.txt _vsx3_sleef Sleef_rempitabdp Sleef_rempitabsp > /home/runner/work/sleef/sleef/_build-ppc64el/include/sleefinline_vsx3.h + # In file included from /home/runner/work/sleef/sleef/src/libm/sleefsimddp.c:178: + # /home/runner/work/sleef/sleef/src/arch/helperpower_128.h:9:2: error: Please specify -mcpu=power8 or -mcpu=power9 + # #error Please specify -mcpu=power8 or -mcpu=power9 + # ``` + - arch: ppc64el + compiler: llvm + # It fails with the following error: + # ``` + # FAILED: src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o + # /usr/bin/clang --target=s390x-linux-gnu -DENABLE_ALIAS=1 -DENABLE_PUREC_SCALAR=1 -DENABLE_SYS_getrandom=1 -DMACRO_ONLY_HEADER=\"macroonlyPUREC_SCALAR.h\" -DSIMD_SUFFIX=_purec_scalar_sleef -DSLEEF_STATIC_LIBS=1 -DUSE_INLINE_HEADER=\"sleefinline_purec_scalar.h\" -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-s390x/include -I/home/runner/work/sleef/sleef/src/libm -I/home/runner/work/sleef/sleef/_build-s390x/src/libm/include -Wall -Wno-unused-function -Wno-attributes -Wno-unused-result -ffp-contract=off -fno-math-errno -fno-trapping-math -fno-strict-aliasing -O3 -DNDEBUG -std=gnu99 -MD -MT src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o -MF src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o.d -o src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o -c /home/runner/work/sleef/sleef/src/libm-tester/iutsimd.c + # In file included from /home/runner/work/sleef/sleef/src/libm-tester/iutsimd.c:65: + # /usr/lib/llvm-13/lib/clang/13.0.1/include/vecintrin.h:11125:2: error: "Use -fzvector to enable vector extensions" + # #error "Use -fzvector to enable vector extensions" + # ``` - arch: s390x - # RISC-V + compiler: llvm + # Only GCC trunk supports the RISC-V V intrinsics and https://github.com/riscv-collab/riscv-gnu-toolchain + # doesn't track a recent enough version yet - arch: riscv64 + compiler: gcc - name: build-${{ matrix.arch }} + name: build-${{ matrix.arch }}-${{ matrix.compiler }} steps: - uses: actions/checkout@v4.1.1 with: @@ -120,33 +158,38 @@ jobs: - name: Install dependencies run: | sudo apt-get update -y -qq - sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.package || format('-{0}-linux-gnu', matrix.arch) }} + sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.gnupkg || format('-{0}-linux-gnu', matrix.arch) }} - - name: Download riscv-gnu-toolchain's LLVM build - env: - RISCV_GNU_TOOLCHAIN_TAG: "2023.10.18" + - name: Install gcc run: | - curl -L https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${RISCV_GNU_TOOLCHAIN_TAG}/riscv64-glibc-ubuntu-20.04-llvm-nightly-${RISCV_GNU_TOOLCHAIN_TAG}-nightly.tar.gz | + RISCV_GNU_TOOLCHAIN_TAG="2023.11.08" + curl -L https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${RISCV_GNU_TOOLCHAIN_TAG}/riscv64-glibc-ubuntu-20.04-gcc-nightly-${RISCV_GNU_TOOLCHAIN_TAG}-nightly.tar.gz | tar xzf - -C /opt - if: ${{ matrix.arch == 'riscv64' }} + echo "PATH=/opt/riscv/bin:$PATH" >> $GITHUB_ENV + if: ${{ matrix.compiler == 'gcc' && matrix.arch == 'riscv64' }} - - name: Download build-native artifacts + - name: Install llvm + run: | + LLVM_VERSION="17" + curl -o llvm.sh https://apt.llvm.org/llvm.sh + chmod u+x llvm.sh + sudo ./llvm.sh ${LLVM_VERSION} + sudo ln -srf $(which clang-${LLVM_VERSION}) /usr/bin/clang + rm llvm.sh + if: ${{ matrix.compiler == 'llvm' }} + + - name: Download build-native-${{ matrix.compiler }} artifacts uses: actions/download-artifact@v3 with: - name: build-native + name: build-native-${{ matrix.compiler }} - - name: Fix build-native permissions + - name: Fix _build-native permissions run: | chmod +x _build-native/bin/* - name: Build ${{ matrix.arch }} shell: bash -ex -o pipefail {0} run: | - # Add riscv-gnu-toolchain to PATH - if [[ ${{ matrix.arch }} = "riscv64" ]]; then - export PATH="/opt/riscv/bin:$PATH" - fi - EXTRA_CMAKE_FLAGS="" if [[ ${{ matrix.arch }} = "aarch64" ]]; then EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_SVE=ON" @@ -157,8 +200,12 @@ jobs: EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VSX=ON -DENFORCE_VSX3=ON" elif [[ ${{ matrix.arch }} = "s390x" ]]; then EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE=ON" - # Disable VXE2 support, QEMU doesn't support it - EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON" + if [[ ${{ matrix.compiler }} = "gcc" ]]; then + # Disable VXE2 support, QEMU doesn't support some instructions generated by gcc + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON" + else + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE2=ON" + fi elif [[ ${{ matrix.arch }} = "riscv64" ]]; then EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_RVVM1=ON -DENFORCE_RVVM2=ON" # Disable inline headers, they just don't compile on riscv64 @@ -168,19 +215,20 @@ jobs: # Disable quad, it's missing the `Sleef_quad` function EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_QUAD=OFF" fi + cmake -S . -B _build-${{ matrix.arch }} -GNinja \ -DCMAKE_INSTALL_PREFIX="$(pwd)/_install-${{ matrix.arch }}" \ - -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-${{ matrix.arch }}.cmake \ + -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-${{ matrix.arch }}-${{ matrix.compiler }}.cmake \ -DNATIVE_BUILD_DIR="$(pwd)/_build-native" \ ${COMMON_CMAKE_FLAGS} \ ${EXTRA_CMAKE_FLAGS} cmake --build _build-${{ matrix.arch }} cmake --install _build-${{ matrix.arch }} - - name: Upload build-${{ matrix.arch }} artifacts + - name: Upload build-${{ matrix.arch }}-${{ matrix.compiler }} artifacts uses: actions/upload-artifact@v3 with: - name: build-${{ matrix.arch }} + name: build-${{ matrix.arch }}-${{ matrix.compiler }} path: | _build-${{ matrix.arch }} _install-${{ matrix.arch }} @@ -195,35 +243,81 @@ jobs: include: # AArch64 - arch: aarch64 + compiler: gcc qemu_cpu: "max,sve=off" - arch: aarch64 + compiler: gcc qemu_cpu: "max,sve=on,sve128=on" - arch: aarch64 + compiler: gcc qemu_cpu: "max,sve=on,sve256=on" - arch: aarch64 + compiler: gcc qemu_cpu: "max,sve=on,sve512=on" + # Some tests fail when compiled with LLVM only + # - arch: aarch64 + # compiler: llvm + # qemu_cpu: "max,sve=off" + # - arch: aarch64 + # compiler: llvm + # qemu_cpu: "max,sve=on,sve128=on" + # - arch: aarch64 + # compiler: llvm + # qemu_cpu: "max,sve=on,sve256=on" + # - arch: aarch64 + # compiler: llvm + # qemu_cpu: "max,sve=on,sve512=on" # Aarch32 - arch: armhf + compiler: gcc + binfmt: arm + qemu_cpu: "max" + - arch: armhf + compiler: llvm binfmt: arm qemu_cpu: "max" # PPC64 - arch: ppc64el + compiler: gcc binfmt: ppc64le qemu_cpu: "power10" + # - arch: ppc64el + # compiler: llvm + # binfmt: ppc64le + # qemu_cpu: "power10" # IBM Z # TODO: figure out qemu_cpu variable to make tests pass on QEMU - arch: s390x + compiler: gcc + # - arch: s390x + # compiler: llvm # RISC-V + # - arch: riscv64 + # compiler: gcc + # qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=false" + # - arch: riscv64 + # compiler: gcc + # qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0" + # - arch: riscv64 + # compiler: gcc + # qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0" + # - arch: riscv64 + # compiler: gcc + # qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0" - arch: riscv64 + compiler: llvm qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=false" - arch: riscv64 + compiler: llvm qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0" - arch: riscv64 + compiler: llvm qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0" - arch: riscv64 + compiler: llvm qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0" - name: "test-${{ matrix.arch }} (qemu_cpu: \"${{ matrix.qemu_cpu }}\")" + name: "test-${{ matrix.arch }}-${{ matrix.compiler }} (qemu_cpu: \"${{ matrix.qemu_cpu }}\")" steps: - uses: actions/checkout@v4.1.1 with: @@ -240,20 +334,19 @@ jobs: run: | cat /proc/cpuinfo - - name: Download build-native artifacts + - name: Download build-native-${{ matrix.compiler }} artifacts uses: actions/download-artifact@v3 with: - name: build-native + name: build-native-${{ matrix.compiler }} - - name: Download build-${{ matrix.arch }} artifacts + - name: Download build-${{ matrix.arch }}-${{ matrix.compiler }} artifacts uses: actions/download-artifact@v3 with: - name: build-${{ matrix.arch }} + name: build-${{ matrix.arch }}-${{ matrix.compiler }} - - name: Fix build-native and _build-${{ matrix.arch }} permissions + - name: Fix _build-native and _build-${{ matrix.arch }} permissions run: | - chmod +x _build-native/bin/* - chmod +x _build-${{ matrix.arch }}/bin/* + chmod +x _build-native/bin/* _build-${{ matrix.arch }}/bin/* - name: Test ${{ matrix.arch }} env: @@ -265,10 +358,10 @@ jobs: cd _build-${{ matrix.arch }} ctest -j$(nproc) - - name: Upload test-${{ matrix.arch }}-${{ strategy.job-index }} artifacts + - name: Upload test-${{ matrix.arch }}-${{ matrix.compiler }}-${{ strategy.job-index }} artifacts uses: actions/upload-artifact@v3 with: - name: test-${{ matrix.arch }}-${{ strategy.job-index }} + name: test-${{ matrix.arch }}-${{ matrix.compiler }}-${{ strategy.job-index }} path: | _build-${{ matrix.arch }}/Testing if: always() diff --git a/travis/before_script.aarch64-gcc.sh b/travis/before_script.aarch64-gcc.sh index 56c4c88c..f590a9db 100644 --- a/travis/before_script.aarch64-gcc.sh +++ b/travis/before_script.aarch64-gcc.sh @@ -8,5 +8,5 @@ ninja all cd /build mkdir build-cross cd build-cross -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64-gcc.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.armhf-gcc.sh b/travis/before_script.armhf-gcc.sh index 8c4bd4fa..464fa581 100644 --- a/travis/before_script.armhf-gcc.sh +++ b/travis/before_script.armhf-gcc.sh @@ -8,4 +8,4 @@ ninja all cd /build mkdir build-cross cd build-cross -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf-gcc.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE .. diff --git a/travis/toolchain-aarch64.cmake b/travis/toolchain-aarch64-gcc.cmake similarity index 100% rename from travis/toolchain-aarch64.cmake rename to travis/toolchain-aarch64-gcc.cmake diff --git a/travis/toolchain-aarch64-llvm.cmake b/travis/toolchain-aarch64-llvm.cmake new file mode 100644 index 00000000..d9c11ae0 --- /dev/null +++ b/travis/toolchain-aarch64-llvm.cmake @@ -0,0 +1,12 @@ +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "aarch64") + +SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu) + +find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang) +set(CMAKE_C_COMPILER_TARGET aarch64-linux-gnu) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/travis/toolchain-armhf.cmake b/travis/toolchain-armhf-gcc.cmake similarity index 100% rename from travis/toolchain-armhf.cmake rename to travis/toolchain-armhf-gcc.cmake diff --git a/travis/toolchain-armhf-llvm.cmake b/travis/toolchain-armhf-llvm.cmake new file mode 100644 index 00000000..6c157289 --- /dev/null +++ b/travis/toolchain-armhf-llvm.cmake @@ -0,0 +1,12 @@ +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "armhf") + +SET(CMAKE_FIND_ROOT_PATH /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf) + +find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang) +set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabihf) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/travis/toolchain-native-gcc.cmake b/travis/toolchain-native-gcc.cmake new file mode 100644 index 00000000..07ea294d --- /dev/null +++ b/travis/toolchain-native-gcc.cmake @@ -0,0 +1 @@ +find_program(CMAKE_C_COMPILER gcc) diff --git a/travis/toolchain-native-llvm.cmake b/travis/toolchain-native-llvm.cmake new file mode 100644 index 00000000..6f8e7121 --- /dev/null +++ b/travis/toolchain-native-llvm.cmake @@ -0,0 +1 @@ +find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang) diff --git a/travis/toolchain-ppc64el.cmake b/travis/toolchain-ppc64el-gcc.cmake similarity index 100% rename from travis/toolchain-ppc64el.cmake rename to travis/toolchain-ppc64el-gcc.cmake diff --git a/travis/toolchain-ppc64el-llvm.cmake b/travis/toolchain-ppc64el-llvm.cmake new file mode 100644 index 00000000..531b36f3 --- /dev/null +++ b/travis/toolchain-ppc64el-llvm.cmake @@ -0,0 +1,14 @@ +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "ppc64") + +SET(CMAKE_FIND_ROOT_PATH /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu) + +find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang) +set(CMAKE_C_COMPILER_TARGET powerpc64le-linux-gnu) + +SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/travis/toolchain-riscv64.cmake b/travis/toolchain-riscv64-gcc.cmake similarity index 79% rename from travis/toolchain-riscv64.cmake rename to travis/toolchain-riscv64-gcc.cmake index bb7b4977..fb7dc230 100644 --- a/travis/toolchain-riscv64.cmake +++ b/travis/toolchain-riscv64-gcc.cmake @@ -2,7 +2,7 @@ set(CMAKE_CROSSCOMPILING TRUE) set(CMAKE_SYSTEM_NAME "Linux") set(CMAKE_SYSTEM_PROCESSOR "riscv64") -find_program(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-clang) +find_program(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-gcc) set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) diff --git a/travis/toolchain-riscv64-llvm.cmake b/travis/toolchain-riscv64-llvm.cmake new file mode 100644 index 00000000..1821770a --- /dev/null +++ b/travis/toolchain-riscv64-llvm.cmake @@ -0,0 +1,12 @@ +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "riscv64") + +SET(CMAKE_FIND_ROOT_PATH /usr/riscv64-linux-gnu /usr/include/riscv64-linux-gnu /usr/lib/riscv64-linux-gnu /lib/riscv64-linux-gnu) + +find_program(CMAKE_C_COMPILER NAMES clang-17 clang) +set(CMAKE_C_COMPILER_TARGET riscv64-linux-gnu) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/travis/toolchain-s390x.cmake b/travis/toolchain-s390x-gcc.cmake similarity index 100% rename from travis/toolchain-s390x.cmake rename to travis/toolchain-s390x-gcc.cmake diff --git a/travis/toolchain-s390x-llvm.cmake b/travis/toolchain-s390x-llvm.cmake new file mode 100644 index 00000000..ca5e9687 --- /dev/null +++ b/travis/toolchain-s390x-llvm.cmake @@ -0,0 +1,12 @@ +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "s390x") + +SET(CMAKE_FIND_ROOT_PATH /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu) + +find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang) +set(CMAKE_C_COMPILER_TARGET s390x-linux-gnu) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) From 3b905a6a213ddf478e3066cb0725378b91ea6b7d Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Mon, 13 Nov 2023 13:41:49 +0000 Subject: [PATCH 06/24] Enable bitmanip extensions on RISC-V --- Configure.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Configure.cmake b/Configure.cmake index 6cb9945f..7ee52250 100644 --- a/Configure.cmake +++ b/Configure.cmake @@ -166,8 +166,8 @@ set(CLANG_FLAGS_ENABLE_VXENOFMA "-march=z14;-mzvector") set(CLANG_FLAGS_ENABLE_VXE2 "-march=z15;-mzvector") set(CLANG_FLAGS_ENABLE_VXE2NOFMA "-march=z15;-mzvector") # RISC-V -set(CLANG_FLAGS_ENABLE_RVVM1 "-march=rv64gcv") -set(CLANG_FLAGS_ENABLE_RVVM2 "-march=rv64gcv") +set(CLANG_FLAGS_ENABLE_RVVM1 "-march=rv64gcv_zba_zbb_zbs") +set(CLANG_FLAGS_ENABLE_RVVM2 "-march=rv64gcv_zba_zbb_zbs") set(FLAGS_OTHERS "") From 798b14f6d135f67b45b8ae6cca9aa51c9887a543 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Mon, 13 Nov 2023 18:12:30 +0000 Subject: [PATCH 07/24] Enable LLVM build for s390x and ppc64el --- .github/workflows/build_and_test.yml | 40 ++++++---------------------- Configure.cmake | 1 + src/libm/CMakeLists.txt | 6 +++++ src/quad/CMakeLists.txt | 5 ++++ 4 files changed, 20 insertions(+), 32 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 6f611be0..52fa8185 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -124,26 +124,6 @@ jobs: - arch: ppc64el gnupkg: -powerpc64le-linux-gnu exclude: - # It fails with the following error: - # ``` - # FAILED: include/sleefinline_vsx3.h /home/runner/work/sleef/sleef/_build-ppc64el/include/sleefinline_vsx3.h - # cd /home/runner/work/sleef/sleef/_build-ppc64el/src/libm && /usr/bin/clang -E -C -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/ -DSLEEF_GENHEADER -DENABLE_VSX3 -DDORENAME /home/runner/work/sleef/sleef/src/libm/sleefsimddp.c > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 && /usr/bin/sed -n -e "/^\\/\\/@#.*\$/p" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp2 && /usr/bin/sed -e "s/^\\/\\/@#/#/g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp2 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/macroonlyVSX3.h && /usr/bin/clang -E -C -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-ppc64el/src/libm/include/ -DSLEEF_GENHEADER -DENABLE_VSX3 -DDORENAME /home/runner/work/sleef/sleef/src/libm/sleefsimdsp.c >> /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 && /usr/bin/sed -e "s/^#.*//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp1 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.c && /usr/bin/clang -E /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.c > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp3 && /usr/bin/sed -e s/SLEEF_VERSION_SLEEF/3.6.0/g -e s/SLEEF_SIMD_SLEEF/VSX3/g /home/runner/work/sleef/sleef/src/libm/sleefinline_header.h.org > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 && /usr/bin/sed -e "s/^#.*//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp3 >> /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 && /usr/bin/sed -e "s/^SLEEFSHARP/#/g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp4 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp5 && /usr/bin/sed -e s/SLEEFXXX//g /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp5 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp6 && /usr/bin/sed -e "s/^[[:space:]]*\$//g" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp6 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp7 && /usr/bin/sed "/^\$/N;/^\\n\$/D" /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp7 > /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp8 && /home/runner/work/sleef/sleef/_build-native/bin/addSuffix /home/runner/work/sleef/sleef/_build-ppc64el/src/libm/sleefVSX3.h.tmp8 /home/runner/work/sleef/sleef/src/common/keywords.txt _vsx3_sleef Sleef_rempitabdp Sleef_rempitabsp > /home/runner/work/sleef/sleef/_build-ppc64el/include/sleefinline_vsx3.h - # In file included from /home/runner/work/sleef/sleef/src/libm/sleefsimddp.c:178: - # /home/runner/work/sleef/sleef/src/arch/helperpower_128.h:9:2: error: Please specify -mcpu=power8 or -mcpu=power9 - # #error Please specify -mcpu=power8 or -mcpu=power9 - # ``` - - arch: ppc64el - compiler: llvm - # It fails with the following error: - # ``` - # FAILED: src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o - # /usr/bin/clang --target=s390x-linux-gnu -DENABLE_ALIAS=1 -DENABLE_PUREC_SCALAR=1 -DENABLE_SYS_getrandom=1 -DMACRO_ONLY_HEADER=\"macroonlyPUREC_SCALAR.h\" -DSIMD_SUFFIX=_purec_scalar_sleef -DSLEEF_STATIC_LIBS=1 -DUSE_INLINE_HEADER=\"sleefinline_purec_scalar.h\" -I/home/runner/work/sleef/sleef/src/common -I/home/runner/work/sleef/sleef/src/arch -I/home/runner/work/sleef/sleef/_build-s390x/include -I/home/runner/work/sleef/sleef/src/libm -I/home/runner/work/sleef/sleef/_build-s390x/src/libm/include -Wall -Wno-unused-function -Wno-attributes -Wno-unused-result -ffp-contract=off -fno-math-errno -fno-trapping-math -fno-strict-aliasing -O3 -DNDEBUG -std=gnu99 -MD -MT src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o -MF src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o.d -o src/libm-tester/CMakeFiles/iutipurec_scalar.dir/iutsimd.c.o -c /home/runner/work/sleef/sleef/src/libm-tester/iutsimd.c - # In file included from /home/runner/work/sleef/sleef/src/libm-tester/iutsimd.c:65: - # /usr/lib/llvm-13/lib/clang/13.0.1/include/vecintrin.h:11125:2: error: "Use -fzvector to enable vector extensions" - # #error "Use -fzvector to enable vector extensions" - # ``` - - arch: s390x - compiler: llvm # Only GCC trunk supports the RISC-V V intrinsics and https://github.com/riscv-collab/riscv-gnu-toolchain # doesn't track a recent enough version yet - arch: riscv64 @@ -200,12 +180,8 @@ jobs: EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VSX=ON -DENFORCE_VSX3=ON" elif [[ ${{ matrix.arch }} = "s390x" ]]; then EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE=ON" - if [[ ${{ matrix.compiler }} = "gcc" ]]; then - # Disable VXE2 support, QEMU doesn't support some instructions generated by gcc - EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON" - else - EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE2=ON" - fi + # Disable VXE2 support, QEMU doesn't support some instructions generated by gcc or llvm + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON" elif [[ ${{ matrix.arch }} = "riscv64" ]]; then EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_RVVM1=ON -DENFORCE_RVVM2=ON" # Disable inline headers, they just don't compile on riscv64 @@ -281,16 +257,16 @@ jobs: compiler: gcc binfmt: ppc64le qemu_cpu: "power10" - # - arch: ppc64el - # compiler: llvm - # binfmt: ppc64le - # qemu_cpu: "power10" + - arch: ppc64el + compiler: llvm + binfmt: ppc64le + qemu_cpu: "power10" # IBM Z # TODO: figure out qemu_cpu variable to make tests pass on QEMU - arch: s390x compiler: gcc - # - arch: s390x - # compiler: llvm + - arch: s390x + compiler: llvm # RISC-V # - arch: riscv64 # compiler: gcc diff --git a/Configure.cmake b/Configure.cmake index 7ee52250..1c4cb6fe 100644 --- a/Configure.cmake +++ b/Configure.cmake @@ -118,6 +118,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") set(SLEEF_ARCH_S390X ON CACHE INTERNAL "True for IBM Z architecture.") + set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-march=z14;-mzvector") set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z14;-mzvector") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") set(SLEEF_ARCH_RISCV64 ON CACHE INTERNAL "True for RISCV64 architecture.") diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt index 352383b1..73818bb0 100644 --- a/src/libm/CMakeLists.txt +++ b/src/libm/CMakeLists.txt @@ -489,12 +489,17 @@ if(BUILD_INLINE_HEADERS) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} SIMDLC) + if(CMAKE_CROSSCOMPILING AND CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_TARGET) + set(FLAG_TARGET --target=${CMAKE_C_COMPILER_TARGET}) + endif() + set(INLINE_HEADER_FILE ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h) add_custom_command( OUTPUT ${INLINE_HEADER_FILE} # Preprocess sleefsimddp.c with SLEEF_GENHEADER defined, comments are preserved COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} # gcc -E -C + ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}} # -msse2 ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch # -I/sleef/src/common -I/sleef/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ # -I/build/src/libm/include ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME @@ -510,6 +515,7 @@ if(BUILD_INLINE_HEADERS) # Preprocess sleefsimdsp.c with SLEEF_GENHEADER defined. Include macroonly*.h instead of helper*.h. COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} # gcc -E -C + ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}} # -msse2 ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch # -I/sleef/src/common -I/sleef/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ # -I/build/src/libm/include ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME diff --git a/src/quad/CMakeLists.txt b/src/quad/CMakeLists.txt index f37de951..5eae42da 100644 --- a/src/quad/CMakeLists.txt +++ b/src/quad/CMakeLists.txt @@ -234,12 +234,17 @@ if(BUILD_INLINE_HEADERS) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} SIMDLC) + if(CMAKE_CROSSCOMPILING AND CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_TARGET) + set(FLAG_TARGET --target=${CMAKE_C_COMPILER_TARGET}) + endif() + set(INLINE_HEADER_FILE ${PROJECT_BINARY_DIR}/include/sleefquadinline_${SIMDLC}.h) add_custom_command( OUTPUT ${INLINE_HEADER_FILE} # Preprocess sleefsimddp.c with SLEEF_GENHEADER defined, comments are preserved COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} # gcc -E -C + ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}} # -msse2 ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch # -I/sleef/src/common -I/sleef/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ # -I/build/src/quad/include ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME From 3bcec26039771e8af513ebb184b80292345a633d Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Mon, 13 Nov 2023 22:07:58 +0000 Subject: [PATCH 08/24] Nit: Reorder build-cross targets to group gcc/llvm per-arch together --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 52fa8185..e5bb74ec 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -116,8 +116,8 @@ jobs: strategy: fail-fast: false matrix: - compiler: [gcc, llvm] arch: [aarch64, armhf, ppc64el, s390x, riscv64] + compiler: [gcc, llvm] include: - arch: armhf gnupkg: -arm-linux-gnueabihf From 24278a0788b2405ddac8b2ed354cd82020692bdf Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Tue, 14 Nov 2023 16:57:14 +0000 Subject: [PATCH 09/24] Remove unused riscv-gnu-toolchain installation step --- .github/workflows/build_and_test.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e5bb74ec..c04b4c0e 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -140,14 +140,6 @@ jobs: sudo apt-get update -y -qq sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.gnupkg || format('-{0}-linux-gnu', matrix.arch) }} - - name: Install gcc - run: | - RISCV_GNU_TOOLCHAIN_TAG="2023.11.08" - curl -L https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/${RISCV_GNU_TOOLCHAIN_TAG}/riscv64-glibc-ubuntu-20.04-gcc-nightly-${RISCV_GNU_TOOLCHAIN_TAG}-nightly.tar.gz | - tar xzf - -C /opt - echo "PATH=/opt/riscv/bin:$PATH" >> $GITHUB_ENV - if: ${{ matrix.compiler == 'gcc' && matrix.arch == 'riscv64' }} - - name: Install llvm run: | LLVM_VERSION="17" From 35be378d3d99fde39f586a620919fc1184c27f5b Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Tue, 14 Nov 2023 18:11:23 +0000 Subject: [PATCH 10/24] Use same compiler versions across all builds --- .github/workflows/build_and_test.yml | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c04b4c0e..5699c460 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -15,6 +15,8 @@ permissions: contents: read env: + GCC_VERSION: "12" + LLVM_VERSION: "17" COMMON_CMAKE_FLAGS: | -DSLEEF_SHOW_CONFIG=1 -DDISABLE_SSL=ON @@ -44,6 +46,20 @@ jobs: sudo apt-get update -y -qq sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev + # Needed for llvm builds as well for target libraries + - name: Install gcc + run: | + sudo apt-get install -y -qq gcc-${GCC_VERSION} + + - name: Install llvm + run: | + curl -o llvm.sh https://apt.llvm.org/llvm.sh + chmod u+x llvm.sh + sudo ./llvm.sh ${LLVM_VERSION} + sudo ln -srf $(which clang-${LLVM_VERSION}) /usr/bin/clang + rm llvm.sh + if: ${{ matrix.compiler == 'llvm' }} + - name: Build native shell: bash -ex -o pipefail {0} run: | @@ -138,11 +154,15 @@ jobs: - name: Install dependencies run: | sudo apt-get update -y -qq - sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.gnupkg || format('-{0}-linux-gnu', matrix.arch) }} + sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev + + # Needed for llvm builds as well for target libraries + - name: Install gcc + run: | + sudo apt-get install -y -qq gcc-${GCC_VERSION}${{ matrix.gnupkg || format('-{0}-linux-gnu', matrix.arch) }} - name: Install llvm run: | - LLVM_VERSION="17" curl -o llvm.sh https://apt.llvm.org/llvm.sh chmod u+x llvm.sh sudo ./llvm.sh ${LLVM_VERSION} From abd492ffac8c329f1217bd94fcd0cf63e0cb5adc Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Tue, 14 Nov 2023 18:26:42 +0000 Subject: [PATCH 11/24] fixup! Use same compiler versions across all builds --- travis/toolchain-aarch64-gcc.cmake | 2 +- travis/toolchain-armhf-gcc.cmake | 2 +- travis/toolchain-ppc64el-gcc.cmake | 2 +- travis/toolchain-riscv64-gcc.cmake | 2 +- travis/toolchain-s390x-gcc.cmake | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/travis/toolchain-aarch64-gcc.cmake b/travis/toolchain-aarch64-gcc.cmake index c73de216..7f82bf9a 100644 --- a/travis/toolchain-aarch64-gcc.cmake +++ b/travis/toolchain-aarch64-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "aarch64") SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu) -find_program(CMAKE_C_COMPILER aarch64-linux-gnu-gcc aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5) +find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-12 aarch64-linux-gnu-gcc-11 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) diff --git a/travis/toolchain-armhf-gcc.cmake b/travis/toolchain-armhf-gcc.cmake index ba233487..af3e1aed 100644 --- a/travis/toolchain-armhf-gcc.cmake +++ b/travis/toolchain-armhf-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "armhf") SET(CMAKE_FIND_ROOT_PATH /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf) -find_program(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5) +find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-12 arm-linux-gnueabihf-gcc-11 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) diff --git a/travis/toolchain-ppc64el-gcc.cmake b/travis/toolchain-ppc64el-gcc.cmake index e26a6eaa..19a5a3be 100644 --- a/travis/toolchain-ppc64el-gcc.cmake +++ b/travis/toolchain-ppc64el-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "ppc64") SET(CMAKE_FIND_ROOT_PATH /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu) -find_program(CMAKE_C_COMPILER powerpc64le-linux-gnu-gcc ppc64el-cc) +find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-12 powerpc64le-linux-gnu-gcc-11 powerpc64le-linux-gnu-gcc ppc64el-cc) SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar) diff --git a/travis/toolchain-riscv64-gcc.cmake b/travis/toolchain-riscv64-gcc.cmake index fb7dc230..d23c56d3 100644 --- a/travis/toolchain-riscv64-gcc.cmake +++ b/travis/toolchain-riscv64-gcc.cmake @@ -2,7 +2,7 @@ set(CMAKE_CROSSCOMPILING TRUE) set(CMAKE_SYSTEM_NAME "Linux") set(CMAKE_SYSTEM_PROCESSOR "riscv64") -find_program(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-gcc) +find_program(CMAKE_C_COMPILER NAMES riscv64-unknown-linux-gnu-gcc-12 riscv64-unknown-linux-gnu-gcc-11 riscv64-unknown-linux-gnu-gcc) set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) diff --git a/travis/toolchain-s390x-gcc.cmake b/travis/toolchain-s390x-gcc.cmake index a2d37bda..f7ba0cb0 100644 --- a/travis/toolchain-s390x-gcc.cmake +++ b/travis/toolchain-s390x-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "s390x") SET(CMAKE_FIND_ROOT_PATH /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu) -find_program(CMAKE_C_COMPILER s390x-linux-gnu-gcc) +find_program(CMAKE_C_COMPILER NAMES s390x-unknown-linux-gnu-gcc-12 s390x-unknown-linux-gnu-gcc-11 s390x-unknown-linux-gnu-gcc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) From b1176459d74c9740971eb18da9c3f1e6a41e9045 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Tue, 14 Nov 2023 23:16:51 +0000 Subject: [PATCH 12/24] fixup! Use same compiler versions across all builds --- travis/toolchain-aarch64-gcc.cmake | 2 +- travis/toolchain-armhf-gcc.cmake | 2 +- travis/toolchain-ppc64el-gcc.cmake | 2 +- travis/toolchain-riscv64-gcc.cmake | 16 +++++++++------- travis/toolchain-s390x-gcc.cmake | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/travis/toolchain-aarch64-gcc.cmake b/travis/toolchain-aarch64-gcc.cmake index 7f82bf9a..710622ea 100644 --- a/travis/toolchain-aarch64-gcc.cmake +++ b/travis/toolchain-aarch64-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "aarch64") SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu) -find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-12 aarch64-linux-gnu-gcc-11 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc) +find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-12 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) diff --git a/travis/toolchain-armhf-gcc.cmake b/travis/toolchain-armhf-gcc.cmake index af3e1aed..86576140 100644 --- a/travis/toolchain-armhf-gcc.cmake +++ b/travis/toolchain-armhf-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "armhf") SET(CMAKE_FIND_ROOT_PATH /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf) -find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-12 arm-linux-gnueabihf-gcc-11 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc) +find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-12 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) diff --git a/travis/toolchain-ppc64el-gcc.cmake b/travis/toolchain-ppc64el-gcc.cmake index 19a5a3be..bec0de6f 100644 --- a/travis/toolchain-ppc64el-gcc.cmake +++ b/travis/toolchain-ppc64el-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "ppc64") SET(CMAKE_FIND_ROOT_PATH /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu) -find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-12 powerpc64le-linux-gnu-gcc-11 powerpc64le-linux-gnu-gcc ppc64el-cc) +find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-12 powerpc64le-linux-gnu-gcc ppc64el-cc) SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar) diff --git a/travis/toolchain-riscv64-gcc.cmake b/travis/toolchain-riscv64-gcc.cmake index d23c56d3..b0840998 100644 --- a/travis/toolchain-riscv64-gcc.cmake +++ b/travis/toolchain-riscv64-gcc.cmake @@ -1,9 +1,11 @@ -set(CMAKE_CROSSCOMPILING TRUE) -set(CMAKE_SYSTEM_NAME "Linux") -set(CMAKE_SYSTEM_PROCESSOR "riscv64") +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "riscv64") -find_program(CMAKE_C_COMPILER NAMES riscv64-unknown-linux-gnu-gcc-12 riscv64-unknown-linux-gnu-gcc-11 riscv64-unknown-linux-gnu-gcc) +SET(CMAKE_FIND_ROOT_PATH /usr/riscv64-linux-gnu /usr/include/riscv64-linux-gnu /usr/lib/riscv64-linux-gnu /lib/riscv64-linux-gnu) -set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) -set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) -set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +find_program(CMAKE_C_COMPILER NAMES riscv64-linux-gnu-gcc-14 riscv64-linux-gnu-gcc) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/travis/toolchain-s390x-gcc.cmake b/travis/toolchain-s390x-gcc.cmake index f7ba0cb0..8f0ed581 100644 --- a/travis/toolchain-s390x-gcc.cmake +++ b/travis/toolchain-s390x-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "s390x") SET(CMAKE_FIND_ROOT_PATH /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu) -find_program(CMAKE_C_COMPILER NAMES s390x-unknown-linux-gnu-gcc-12 s390x-unknown-linux-gnu-gcc-11 s390x-unknown-linux-gnu-gcc) +find_program(CMAKE_C_COMPILER NAMES s390x-linux-gnu-gcc-12 s390x-linux-gnu-gcc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) From 1c097ca5ba889de2c58dcbc9f9d2ded52d3c025a Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Wed, 15 Nov 2023 00:02:13 +0000 Subject: [PATCH 13/24] Downgrade version to GCC 11 It fails on aarch64, ppc64el, and s390x https://github.com/shibatch/sleef/actions/runs/6870688901\?pr\=477 --- .github/workflows/build_and_test.yml | 2 +- travis/toolchain-aarch64-gcc.cmake | 2 +- travis/toolchain-armhf-gcc.cmake | 2 +- travis/toolchain-ppc64el-gcc.cmake | 2 +- travis/toolchain-s390x-gcc.cmake | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 5699c460..e5e518d8 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -15,7 +15,7 @@ permissions: contents: read env: - GCC_VERSION: "12" + GCC_VERSION: "11" LLVM_VERSION: "17" COMMON_CMAKE_FLAGS: | -DSLEEF_SHOW_CONFIG=1 diff --git a/travis/toolchain-aarch64-gcc.cmake b/travis/toolchain-aarch64-gcc.cmake index 710622ea..c3594551 100644 --- a/travis/toolchain-aarch64-gcc.cmake +++ b/travis/toolchain-aarch64-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "aarch64") SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu) -find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-12 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc) +find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-11 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) diff --git a/travis/toolchain-armhf-gcc.cmake b/travis/toolchain-armhf-gcc.cmake index 86576140..24e160b9 100644 --- a/travis/toolchain-armhf-gcc.cmake +++ b/travis/toolchain-armhf-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "armhf") SET(CMAKE_FIND_ROOT_PATH /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf) -find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-12 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc) +find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-11 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) diff --git a/travis/toolchain-ppc64el-gcc.cmake b/travis/toolchain-ppc64el-gcc.cmake index bec0de6f..7d6c96ae 100644 --- a/travis/toolchain-ppc64el-gcc.cmake +++ b/travis/toolchain-ppc64el-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "ppc64") SET(CMAKE_FIND_ROOT_PATH /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu) -find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-12 powerpc64le-linux-gnu-gcc ppc64el-cc) +find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-11 powerpc64le-linux-gnu-gcc ppc64el-cc) SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar) diff --git a/travis/toolchain-s390x-gcc.cmake b/travis/toolchain-s390x-gcc.cmake index 8f0ed581..4aa9f12c 100644 --- a/travis/toolchain-s390x-gcc.cmake +++ b/travis/toolchain-s390x-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "s390x") SET(CMAKE_FIND_ROOT_PATH /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu) -find_program(CMAKE_C_COMPILER NAMES s390x-linux-gnu-gcc-12 s390x-linux-gnu-gcc) +find_program(CMAKE_C_COMPILER NAMES s390x-linux-gnu-gcc-11 s390x-linux-gnu-gcc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) From 395a6f64bdedec5635afebe6cccee230c2290ce3 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Wed, 15 Nov 2023 14:47:06 +0000 Subject: [PATCH 14/24] Add rvvm*nofma configuration --- CMakeLists.txt | 4 +-- Configure.cmake | 10 ++++++++ src/arch/helperrvv.h | 44 ++++++++++++++++++++++----------- src/common/commonfuncs.h | 6 ++--- src/common/dd.h | 2 +- src/common/df.h | 2 +- src/libm-tester/iutsimd.c | 16 ++++++++++-- src/libm-tester/tester2simddp.c | 18 +++++++++++++- src/libm-tester/tester2simdsp.c | 18 +++++++++++++- src/libm/CMakeLists.txt | 6 +++++ src/libm/sleefsimddp.c | 26 +++++++++++++++++++ src/libm/sleefsimdsp.c | 34 ++++++++++++++++++++++--- 12 files changed, 156 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 40dca676..298a2fee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,7 +46,7 @@ set(SLEEF_ALL_SUPPORTED_EXTENSIONS NEON32 NEON32VFPV4 # Aarch32 VSX VSXNOFMA VSX3 VSX3NOFMA # PPC64 VXE VXENOFMA VXE2 VXE2NOFMA # IBM Z - RVVM1 RVVM2 # RISC-V Vectors + RVVM1NOFMA RVVM1 RVVM2NOFMA RVVM2 # RISC-V Vectors PUREC_SCALAR PURECFMA_SCALAR # Generic type CACHE STRING "List of SIMD architectures supported by libsleef." ) @@ -57,7 +57,7 @@ set(SLEEF_SUPPORTED_LIBM_EXTENSIONS NEON32 NEON32VFPV4 # Aarch32 VSX VSXNOFMA VSX3 VSX3NOFMA # PPC64 VXE VXENOFMA VXE2 VXE2NOFMA # IBM Z - RVVM1 RVVM2 # RISC-V Vectors + RVVM1NOFMA RVVM1 RVVM2NOFMA RVVM2 # RISC-V Vectors PUREC_SCALAR PURECFMA_SCALAR # Generic type CACHE STRING "List of SIMD architectures supported by libsleef." ) diff --git a/Configure.cmake b/Configure.cmake index 1c4cb6fe..ac64f16f 100644 --- a/Configure.cmake +++ b/Configure.cmake @@ -168,7 +168,9 @@ set(CLANG_FLAGS_ENABLE_VXE2 "-march=z15;-mzvector") set(CLANG_FLAGS_ENABLE_VXE2NOFMA "-march=z15;-mzvector") # RISC-V set(CLANG_FLAGS_ENABLE_RVVM1 "-march=rv64gcv_zba_zbb_zbs") +set(CLANG_FLAGS_ENABLE_RVVM1NOFMA "-march=rv64gcv_zba_zbb_zbs") set(CLANG_FLAGS_ENABLE_RVVM2 "-march=rv64gcv_zba_zbb_zbs") +set(CLANG_FLAGS_ENABLE_RVVM2NOFMA "-march=rv64gcv_zba_zbb_zbs") set(FLAGS_OTHERS "") @@ -634,6 +636,10 @@ if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM1) int main() { vint32m1_t r = __riscv_vmv_v_x_i32m1(1, __riscv_v_min_vlen / 32); }" COMPILER_SUPPORTS_RVVM1) + + if(COMPILER_SUPPORTS_RVVM1) + set(COMPILER_SUPPORTS_RVVM1NOFMA 1) + endif() endif() if (ENFORCE_RVVM1 AND NOT COMPILER_SUPPORTS_RVVM1) @@ -652,6 +658,10 @@ if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM2) int main() { vint32m2_t r = __riscv_vmv_v_x_i32m2(1, __riscv_v_min_vlen / 32); }" COMPILER_SUPPORTS_RVVM2) + + if(COMPILER_SUPPORTS_RVVM2) + set(COMPILER_SUPPORTS_RVVM2NOFMA 1) + endif() endif() if (ENFORCE_RVVM2 AND NOT COMPILER_SUPPORTS_RVVM2) diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h index 18fe7642..dbfd2adf 100644 --- a/src/arch/helperrvv.h +++ b/src/arch/helperrvv.h @@ -10,7 +10,7 @@ #endif #endif // #if !defined(SLEEF_GENHEADER) -#if CONFIG == 1 +#if CONFIG == 1 || CONFIG == 2 #define ISANAME "RISC-V Vector Extension with Min. VLEN" #define SLEEF_RVV_VLEN __riscv_v_min_vlen #else @@ -45,23 +45,27 @@ static INLINE int vavailability_i(int name) { return -1; } #ifdef ENABLE_RVV_SP // Types that conflict with ENABLE_RVV_DP -#ifdef ENABLE_RVVM1 +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) typedef vuint64m2_t vmask; typedef vbool32_t vopmask; -#else +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) typedef vuint64m4_t vmask; typedef vbool16_t vopmask; +#else +#error "unknown rvv lmul" #endif #endif #ifdef ENABLE_RVV_DP // Types that conflict with ENABLE_RVV_SP -#ifdef ENABLE_RVVM1 +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) typedef vuint64m1_t vmask; typedef vbool64_t vopmask; -#else +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) typedef vuint64m2_t vmask; typedef vbool32_t vopmask; +#else +#error "unknown rvv lmul" #endif #endif @@ -73,7 +77,7 @@ typedef vbool32_t vopmask; // wide-LMUL register group. In the largest cases (ddi_t and ddf_t), this // requires LMUL=8 if the base type (vfloat or vdouble) has LMUL=2, meaning // LMUL=2 is currently the widest option for SLEEF function argument types. -#ifdef ENABLE_RVVM1 +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) typedef vint32mf2_t vint; typedef vfloat64m1_t vdouble; @@ -156,7 +160,7 @@ typedef vint32m4_t dfi_t; #define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m1 #define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32mf2 -#else +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) typedef vint32m1_t vint; typedef vfloat64m2_t vdouble; @@ -239,6 +243,8 @@ typedef vint32m8_t dfi_t; #define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m2 #define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32m1 +#else +#error "unknown rvv lmul" #endif // ENABLE_RVVM1 //////////////////////////////////////////////////////////////////////////////// @@ -635,19 +641,23 @@ static INLINE vdouble digetd_vd_di(di_t d) { return SLEEF_RVV_DP_VGET_VD(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(d), 0); } static INLINE vint digeti_vi_di(di_t d) { -#ifdef ENABLE_RVVM1 +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 1)); -#else +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) return SLEEF_RVV_DP_VGET_VI(d, 2); +#else +#error "unknown rvv lmul" #endif } static INLINE di_t disetdi_di_vd_vi(vdouble d, vint i) { di_t res; res = SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(res), 0, d)); -#ifdef ENABLE_RVVM1 +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) res = __riscv_vset(res, 1, __riscv_vlmul_ext_i32m1(i)); -#else +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) res = __riscv_vset(res, 2, i); +#else +#error "unknown rvv lmul" #endif return res; } @@ -656,19 +666,23 @@ static INLINE vdouble2 ddigetdd_vd2_ddi(ddi_t d) { return SLEEF_RVV_DP_VGET_VD2(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(d), 0); } static INLINE vint ddigeti_vi_ddi(ddi_t d) { -#ifdef ENABLE_RVVM1 +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 2)); -#else +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) return SLEEF_RVV_DP_VGET_VI(d, 4); +#else +#error "unknown rvv lmul" #endif } static INLINE ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) { ddi_t res; res = SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(res), 0, v)); -#ifdef ENABLE_RVVM1 +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) res = __riscv_vset(res, 2, __riscv_vlmul_ext_i32m1(i)); -#else +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) res = __riscv_vset(res, 4, i); +#else +#error "unknown rvv lmul" #endif return res; } diff --git a/src/common/commonfuncs.h b/src/common/commonfuncs.h index aff782df..274156fc 100644 --- a/src/common/commonfuncs.h +++ b/src/common/commonfuncs.h @@ -3,7 +3,7 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) typedef struct { vdouble x, y, z; } vdouble3; @@ -210,7 +210,7 @@ static INLINE CONST VECTOR_CC vdouble vtoward0_vd_vd(vdouble x) { // returns nex return vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vcast_vd_d(0), t); } -#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static INLINE CONST vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y))); } @@ -220,7 +220,7 @@ static INLINE CONST VECTOR_CC vdouble vsign_vd_vd(vdouble d) { return vmulsign_vd_vd_vd(vcast_vd_d(1.0), d); } -#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static INLINE CONST VECTOR_CC vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y))); } diff --git a/src/common/dd.h b/src/common/dd.h index 89af2e87..3431e42d 100644 --- a/src/common/dd.h +++ b/src/common/dd.h @@ -3,7 +3,7 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) #if !defined(ENABLE_CUDA) typedef struct { vdouble x, y; diff --git a/src/common/df.h b/src/common/df.h index 0883b227..a14c1c6a 100644 --- a/src/common/df.h +++ b/src/common/df.h @@ -3,7 +3,7 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) #if !defined(ENABLE_CUDA) typedef struct { vfloat x, y; diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c index 90353586..03fcd743 100644 --- a/src/libm-tester/iutsimd.c +++ b/src/libm-tester/iutsimd.c @@ -349,12 +349,24 @@ typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #include "renamervvm1.h" #endif +#ifdef ENABLE_RVVM1NOFMA +#define CONFIG 2 +#include "helperrvv.h" +#include "renamervvm1nofma.h" +#endif + #ifdef ENABLE_RVVM2 #define CONFIG 1 #include "helperrvv.h" #include "renamervvm2.h" #endif +#ifdef ENABLE_RVVM2NOFMA +#define CONFIG 2 +#include "helperrvv.h" +#include "renamervvm2nofma.h" +#endif + #ifdef ENABLE_PUREC_SCALAR #include "renamepurec_scalar.h" #if !defined(USE_INLINE_HEADER) @@ -438,12 +450,12 @@ int check_feature(double d, float f) { return 0; } -#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2) || defined(USE_INLINE_HEADER)) +#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) || defined(USE_INLINE_HEADER)) static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; } static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; } #endif -#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2) || defined(USE_INLINE_HEADER)) +#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) || defined(USE_INLINE_HEADER)) static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; } static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; } #endif diff --git a/src/libm-tester/tester2simddp.c b/src/libm-tester/tester2simddp.c index 9d723868..5071bb70 100644 --- a/src/libm-tester/tester2simddp.c +++ b/src/libm-tester/tester2simddp.c @@ -199,6 +199,14 @@ typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #include "sleef.h" #endif +#ifdef ENABLE_RVVM1NOFMA +#define CONFIG 2 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm1nofma.h" +#include "sleef.h" +#endif + #ifdef ENABLE_RVVM2 #define CONFIG 1 #define ENABLE_RVV_DP @@ -207,6 +215,14 @@ typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #include "sleef.h" #endif +#ifdef ENABLE_RVVM2NOFMA +#define CONFIG 2 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm2nofma.h" +#include "sleef.h" +#endif + #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" @@ -225,7 +241,7 @@ typedef Sleef_float_2 vfloat2; // -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; } static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; } #endif diff --git a/src/libm-tester/tester2simdsp.c b/src/libm-tester/tester2simdsp.c index d83e8b4b..3fb1e619 100644 --- a/src/libm-tester/tester2simdsp.c +++ b/src/libm-tester/tester2simdsp.c @@ -199,6 +199,14 @@ typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #include "sleef.h" #endif +#ifdef ENABLE_RVVM1NOFMA +#define CONFIG 2 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm1nofma.h" +#include "sleef.h" +#endif + #ifdef ENABLE_RVVM2 #define CONFIG 1 #define ENABLE_RVV_SP @@ -207,6 +215,14 @@ typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #include "sleef.h" #endif +#ifdef ENABLE_RVVM2NOFMA +#define CONFIG 2 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm2nofma.h" +#include "sleef.h" +#endif + #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" @@ -225,7 +241,7 @@ typedef Sleef_float_2 vfloat2; // -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; } static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; } #endif diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt index 73818bb0..35b12734 100644 --- a/src/libm/CMakeLists.txt +++ b/src/libm/CMakeLists.txt @@ -63,7 +63,9 @@ elseif(SLEEF_ARCH_S390X) elseif(SLEEF_ARCH_RISCV64) set(SLEEF_HEADER_LIST RVVM1 + RVVM1NOFMA RVVM2 + RVVM2NOFMA PUREC_SCALAR PURECFMA_SCALAR ) @@ -106,7 +108,9 @@ command_arguments(HEADER_PARAMS_VXE2 finz_ 2 4 "SLEEF_VECTOR_DOUBLE" command_arguments(HEADER_PARAMS_VXE2NOFMA cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2nofma) command_arguments(HEADER_PARAMS_RVVM1 finz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v m1) +command_arguments(HEADER_PARAMS_RVVM1NOFMA cinz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v m1nofma) command_arguments(HEADER_PARAMS_RVVM2 finz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v m2) +command_arguments(HEADER_PARAMS_RVVM2NOFMA cinz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v m2nofma) command_arguments(HEADER_PARAMS_DSP_SCALAR - 1 1 double float int32_t int32_t __STDC__) command_arguments(HEADER_PARAMS_PUREC_SCALAR cinz_ 1 1 double float int32_t int32_t __STDC__ purec) @@ -155,7 +159,9 @@ command_arguments(RENAME_PARAMS_GNUABI_ADVSIMD advsimd n 2 4 float64x2_t float3 command_arguments(RENAME_PARAMS_GNUABI_SVE sve s x x svfloat64_t svfloat32_t svint32_t svint32_t __ARM_SVE) command_arguments(RENAME_PARAMS_RVVM1 finz_ x x m1) +command_arguments(RENAME_PARAMS_RVVM1NOFMA cinz_ x x m1nofma) command_arguments(RENAME_PARAMS_RVVM2 finz_ x x m2) +command_arguments(RENAME_PARAMS_RVVM2NOFMA cinz_ x x m2nofma) # ALIAS_PARAMS diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c index 68c13a34..6cdaa6ad 100644 --- a/src/libm/sleefsimddp.c +++ b/src/libm/sleefsimddp.c @@ -235,6 +235,19 @@ extern const double Sleef_rempitabdp[]; #endif #endif +#ifdef ENABLE_RVVM1NOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_DP +#include "helperrvv.h" +#else +#include "macroonlyRVVM1NOFMA.h" +#endif +#ifdef DORENAME +#include "renamervvm1nofma.h" +#endif +#endif /* ENABLE_RVVM1NOFMA */ + #ifdef ENABLE_RVVM2 #define CONFIG 1 #if !defined(SLEEF_GENHEADER) @@ -248,6 +261,19 @@ extern const double Sleef_rempitabdp[]; #endif #endif +#ifdef ENABLE_RVVM2NOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_DP +#include "helperrvv.h" +#else +#include "macroonlyRVVM2NOFMA.h" +#endif +#ifdef DORENAME +#include "renamervvm2nofma.h" +#endif +#endif /* ENABLE_RVVM2NOFMA */ + // Generic #ifdef ENABLE_VECEXT diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c index 5ec5a082..c5dbc2a5 100644 --- a/src/libm/sleefsimdsp.c +++ b/src/libm/sleefsimdsp.c @@ -335,6 +335,19 @@ extern const float Sleef_rempitabsp[]; #endif #endif +#ifdef ENABLE_RVVM1NOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_SP +#include "helperrvv.h" +#else +#include "macroonlyRVVM1NOFMA.h" +#endif +#ifdef DORENAME +#include "renamervvm1nofma.h" +#endif +#endif + #ifdef ENABLE_RVVM2 #define CONFIG 1 #if !defined(SLEEF_GENHEADER) @@ -348,6 +361,19 @@ extern const float Sleef_rempitabsp[]; #endif #endif +#ifdef ENABLE_RVVM2NOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_SP +#include "helperrvv.h" +#else +#include "macroonlyRVVM2NOFMA.h" +#endif +#ifdef DORENAME +#include "renamervvm2nofma.h" +#endif +#endif + // Generic #ifdef ENABLE_VECEXT @@ -428,7 +454,7 @@ static INLINE CONST VECTOR_CC vmask vsignbit_vm_vf(vfloat f) { return vand_vm_vm_vm(vreinterpret_vm_vf(f), vreinterpret_vm_vf(vcast_vf_f(-0.0f))); } -#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static INLINE CONST VECTOR_CC vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y))); } @@ -516,7 +542,7 @@ static INLINE CONST VECTOR_CC vfloat vldexp3_vf_vf_vi2(vfloat d, vint2 q) { EXPORT CONST VECTOR_CC vfloat xldexpf(vfloat x, vint2 q) { return vldexp_vf_vf_vi2(x, q); } -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) typedef struct { vfloat d; vint2 i; @@ -546,7 +572,7 @@ static dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) { } #endif -#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static INLINE CONST VECTOR_CC vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) { return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y))); } @@ -3321,7 +3347,7 @@ EXPORT CONST VECTOR_CC vfloat xcospif_u05(vfloat d) { } #endif // #if !defined(DETERMINISTIC) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM2)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) typedef struct { vfloat2 a, b; } df2; From ce3f65a210a04bdb5cec31cd2f384a4e0078987d Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Wed, 15 Nov 2023 16:24:34 +0000 Subject: [PATCH 15/24] Fix CONFIG for RVV in helperrvv --- src/arch/helperrvv.h | 60 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h index dbfd2adf..610c99f1 100644 --- a/src/arch/helperrvv.h +++ b/src/arch/helperrvv.h @@ -13,19 +13,34 @@ #if CONFIG == 1 || CONFIG == 2 #define ISANAME "RISC-V Vector Extension with Min. VLEN" #define SLEEF_RVV_VLEN __riscv_v_min_vlen +#elif CONFIG == 8 +// 256-bit vector length +#define ISANAME "RISC-V Vector Extension 256-bit" +#define SLEEF_RVV_VLEN (1 << 8) +#elif CONFIG == 9 +// 512-bit vector length +#define ISANAME "RISC-V Vector Extension 512-bit" +#define SLEEF_RVV_VLEN (1 << 9) +#elif CONFIG == 10 +// 1024-bit vector length +#define ISANAME "RISC-V Vector Extension 1024-bit" +#define SLEEF_RVV_VLEN (1 << 0) +#elif CONFIG == 11 +// 2048-bit vector length +#define ISANAME "RISC-V Vector Extension 2048-bit" +#define SLEEF_RVV_VLEN (1 << 1) #else -#define ISANAME "RISC-V Vector Extension VLEN=2^"#CONFIG -#define SLEEF_RVV_VLEN (1 << CONFIG) -#endif - -#ifndef CONFIG -#error CONFIG macro not defined +#error CONFIG macro invalid or not defined #endif #define ENABLE_SP -#define ENABLE_FMA_DP #define ENABLE_DP +#if CONFIG != 2 +#define ENABLE_FMA_SP +#define ENABLE_FMA_DP +#endif + static INLINE int vavailability_i(int name) { return -1; } //////////////////////////////////////////////////////////////////////////////// @@ -405,13 +420,33 @@ static INLINE vfloat vrec_vf_vf(vfloat d) { static INLINE vfloat vsqrt_vf_vf(vfloat d) { return __riscv_vfsqrt(d, VECTLENSP); } -// fused multiply-add/subtract +#if defined(ENABLE_FMA_SP) +// Multiply accumulate: z = z + x * y static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return __riscv_vfmadd(x, y, z, VECTLENSP); } +// Multiply subtract: z = z - x * y static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return __riscv_vfnmsub(x, y, z, VECTLENSP); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return __riscv_vfmsub(x, y, z, VECTLENSP); +} +#else +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +#endif +// fused multiply add / sub +static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // (x * y) + z + return __riscv_vfmadd(x, y, z, VECTLENSP); +} +static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // -(x * y) + z + return __riscv_vfnmsub(x, y, z, VECTLENSP); +} +static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // (x * y) - z + return __riscv_vfmsub(x, y, z, VECTLENSP); +} // sign manipulation static INLINE vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) { return __riscv_vfsgnjx(x, y, VECTLENSP); @@ -782,13 +817,20 @@ static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return __riscv_vfmin(x, y, VECTLENDP); } -// fused multiply add / sub +#if defined(ENABLE_FMA_DP) +// Multiply accumulate: z = z + x * y static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return __riscv_vfmadd(x, y, z, VECTLENDP); } +// Multiply subtract: z = z - x * y static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return __riscv_vfmsub(x, y, z, VECTLENDP); } +#else +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +#endif +// fused multiply add / sub static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return __riscv_vfmadd(x, y, z, VECTLENDP); } From 549d2120448e1ed173b98b6fe8831a81d44051b4 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Wed, 15 Nov 2023 20:17:17 +0000 Subject: [PATCH 16/24] fixup! Fix CONFIG for RVV in helperrvv --- src/arch/helperrvv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h index 610c99f1..8f2eb204 100644 --- a/src/arch/helperrvv.h +++ b/src/arch/helperrvv.h @@ -24,11 +24,11 @@ #elif CONFIG == 10 // 1024-bit vector length #define ISANAME "RISC-V Vector Extension 1024-bit" -#define SLEEF_RVV_VLEN (1 << 0) +#define SLEEF_RVV_VLEN (1 << 10) #elif CONFIG == 11 // 2048-bit vector length #define ISANAME "RISC-V Vector Extension 2048-bit" -#define SLEEF_RVV_VLEN (1 << 1) +#define SLEEF_RVV_VLEN (1 << 11) #else #error CONFIG macro invalid or not defined #endif From e1b59d388395c36a5fa3324fd40973370c47fa78 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Wed, 15 Nov 2023 20:19:26 +0000 Subject: [PATCH 17/24] fixup! Fix CONFIG for RVV in helperrvv --- src/arch/helperrvv.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h index 8f2eb204..3090121c 100644 --- a/src/arch/helperrvv.h +++ b/src/arch/helperrvv.h @@ -13,6 +13,10 @@ #if CONFIG == 1 || CONFIG == 2 #define ISANAME "RISC-V Vector Extension with Min. VLEN" #define SLEEF_RVV_VLEN __riscv_v_min_vlen +#elif CONFIG == 7 +// 128-bit vector length +#define ISANAME "RISC-V Vector Extension 128-bit" +#define SLEEF_RVV_VLEN (1 << 7) #elif CONFIG == 8 // 256-bit vector length #define ISANAME "RISC-V Vector Extension 256-bit" From 105ff56119d455958c8f48aabba130b4b62fef15 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Thu, 16 Nov 2023 13:32:31 +0000 Subject: [PATCH 18/24] Add tester3 for RISC-V --- .github/workflows/build_and_test.yml | 3 ++- CMakeLists.txt | 4 +++ src/libm-tester/CMakeLists.txt | 8 ++++++ src/libm-tester/tester3.c | 40 +++++++++++++++++++++++++++- src/libm/CMakeLists.txt | 16 +++++------ 5 files changed, 61 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e5e518d8..1953cbc5 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -19,13 +19,14 @@ env: LLVM_VERSION: "17" COMMON_CMAKE_FLAGS: | -DSLEEF_SHOW_CONFIG=1 - -DDISABLE_SSL=ON -DBUILD_GNUABI_LIBS=ON -DBUILD_INLINE_HEADERS=ON -DBUILD_DFT=ON -DBUILD_QUAD=ON -DBUILD_SCALAR_LIB=ON -DBUILD_STATIC_TEST_BINS=ON + -DENFORCE_TESTER=ON + -DENFORCE_TESTER3=ON jobs: build-native: diff --git a/CMakeLists.txt b/CMakeLists.txt index 298a2fee..6bd3ab90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,6 +87,10 @@ set(COSTOVERRIDE_NEON32 2) set(COSTOVERRIDE_NEON32VFPV4 2) set(COSTOVERRIDE_SVE 10) set(COSTOVERRIDE_SVENOFMA 10) +set(COSTOVERRIDE_RVVM1 10) +set(COSTOVERRIDE_RVVM1NOFMA 10) +set(COSTOVERRIDE_RVVM2 10) +set(COSTOVERRIDE_RVVM2NOFMA 10) # diff --git a/src/libm-tester/CMakeLists.txt b/src/libm-tester/CMakeLists.txt index 69f2ba6e..41d6f36f 100644 --- a/src/libm-tester/CMakeLists.txt +++ b/src/libm-tester/CMakeLists.txt @@ -27,6 +27,11 @@ set(TESTER3_DEFINITIONS_VXENOFMA ATR=cinz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SL set(TESTER3_DEFINITIONS_VXE2 ATR=finz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vxe2) set(TESTER3_DEFINITIONS_VXE2NOFMA ATR=cinz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vxe2nofma) +set(TESTER3_DEFINITIONS_RVVM1 ATR=finz_ DPTYPE=vfloat64m1_t SPTYPE=vfloat32m1_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm1 ENABLE_RVVM1) +set(TESTER3_DEFINITIONS_RVVM1NOFMA ATR=cinz_ DPTYPE=vfloat64m1_t SPTYPE=vfloat32m1_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm1nofma ENABLE_RVVM1) +set(TESTER3_DEFINITIONS_RVVM2 ATR=finz_ DPTYPE=vfloat64m2_t SPTYPE=vfloat32m2_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm2 ENABLE_RVVM2) +set(TESTER3_DEFINITIONS_RVVM2NOFMA ATR=cinz_ DPTYPE=vfloat64m2_t SPTYPE=vfloat32m2_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm2nofma ENABLE_RVVM2) + set(TESTER3_DEFINITIONS_PUREC_SCALAR ATR=cinz_ DPTYPE=double SPTYPE=float DPTYPESPEC=d1 SPTYPESPEC=f1 EXTSPEC=purec) set(TESTER3_DEFINITIONS_PURECFMA_SCALAR ATR=finz_ DPTYPE=double SPTYPE=float DPTYPESPEC=d1 SPTYPESPEC=f1 EXTSPEC=purecfma) @@ -47,6 +52,9 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") set(TEST3_CINZ purec_scalar vxenofma vxe2nofma) set(TEST3_FINZ purecfma_scalar vxe vxe2) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") + set(TEST3_CINZ purec_scalar rvvm1nofma rvvm2nofma) + set(TEST3_FINZ purecfma_scalar rvvm1 rvvm2) endif() # diff --git a/src/libm-tester/tester3.c b/src/libm-tester/tester3.c index f4b27e56..3027dff3 100644 --- a/src/libm-tester/tester3.c +++ b/src/libm-tester/tester3.c @@ -101,6 +101,44 @@ static INLINE __attribute__((vector_size(16))) float setSLEEF_VECTOR_FLOAT(float static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float v, int r) { return unifyValuef(v[r & 3]); } #endif +#if __riscv && __riscv_v + +#if defined(ENABLE_RVVM1) +#define VECTLENSP (1 * __riscv_v_min_vlen / 32) +#define VECTLENDP (1 * __riscv_v_min_vlen / 64) + +static INLINE vfloat32m1_t setvfloat32m1_t(float d, int r) { float a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m1(a, VECTLENSP); } +static INLINE float getvfloat32m1_t(vfloat32m1_t v, int r) { float a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); } +static INLINE vfloat64m1_t setvfloat64m1_t(double d, int r) { double a[VECTLENDP]; memrand(a, sizeof(a)); a[r & (VECTLENDP-1)] = d; return __riscv_vle64_v_f64m1(a, VECTLENDP); } +static INLINE double getvfloat64m1_t(vfloat64m1_t v, int r) { double a[VECTLENDP]; __riscv_vse64(a, v, VECTLENDP); return unifyValue(a[r & (VECTLENDP-1)]); } + +static vfloat32m1_t vf2getx_vf_vf2(vfloat32m2_t v) { return __riscv_vget_f32m1(v, 0); } +static vfloat32m1_t vf2gety_vf_vf2(vfloat32m2_t v) { return __riscv_vget_f32m1(v, 1); } +static vfloat64m1_t vd2getx_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 0); } +static vfloat64m1_t vd2gety_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 1); } + +#elif defined(ENABLE_RVVM2) +#define VECTLENSP (2 * __riscv_v_min_vlen / 32) +#define VECTLENDP (2 * __riscv_v_min_vlen / 64) + +static INLINE vfloat32m2_t setvfloat32m2_t(float d, int r) { float a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m2(a, VECTLENSP); } +static INLINE float getvfloat32m2_t(vfloat32m2_t v, int r) { float a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); } +static INLINE vfloat64m2_t setvfloat64m2_t(double d, int r) { double a[VECTLENDP]; memrand(a, sizeof(a)); a[r & (VECTLENDP-1)] = d; return __riscv_vle64_v_f64m2(a, VECTLENDP); } +static INLINE double getvfloat64m2_t(vfloat64m2_t v, int r) { double a[VECTLENDP]; __riscv_vse64(a, v, VECTLENDP); return unifyValue(a[r & (VECTLENDP-1)]); } + +static vfloat32m2_t vf2getx_vf_vf2(vfloat32m4_t v) { return __riscv_vget_f32m2(v, 0); } +static vfloat32m2_t vf2gety_vf_vf2(vfloat32m4_t v) { return __riscv_vget_f32m2(v, 1); } +static vfloat64m2_t vd2getx_vd_vd2(vfloat64m4_t v) { return __riscv_vget_f64m2(v, 0); } +static vfloat64m2_t vd2gety_vd_vd2(vfloat64m4_t v) { return __riscv_vget_f64m2(v, 1); } + +#else +#error "unknown RVV" +#endif + +#undef VECTLENSP +#undef VECTLENDP +#endif + // // ATR = cinz_, NAME = sin, TYPE = d2, ULP = u35, EXT = sse2 @@ -110,7 +148,7 @@ static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float #define SET(TYPE) set ## TYPE #define GET(TYPE) get ## TYPE -#ifndef __ARM_FEATURE_SVE +#if !defined(__ARM_FEATURE_SVE) && !(defined(__riscv) && defined(__riscv_v)) static DPTYPE vd2getx_vd_vd2(TYPE2(DPTYPE) v) { return v.x; } static DPTYPE vd2gety_vd_vd2(TYPE2(DPTYPE) v) { return v.y; } static SPTYPE vf2getx_vf_vf2(TYPE2(SPTYPE) v) { return v.x; } diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt index 35b12734..a0e4029d 100644 --- a/src/libm/CMakeLists.txt +++ b/src/libm/CMakeLists.txt @@ -107,10 +107,10 @@ command_arguments(HEADER_PARAMS_VXENOFMA cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" command_arguments(HEADER_PARAMS_VXE2 finz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2) command_arguments(HEADER_PARAMS_VXE2NOFMA cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2nofma) -command_arguments(HEADER_PARAMS_RVVM1 finz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v m1) -command_arguments(HEADER_PARAMS_RVVM1NOFMA cinz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v m1nofma) -command_arguments(HEADER_PARAMS_RVVM2 finz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v m2) -command_arguments(HEADER_PARAMS_RVVM2NOFMA cinz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v m2nofma) +command_arguments(HEADER_PARAMS_RVVM1 finz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v rvvm1) +command_arguments(HEADER_PARAMS_RVVM1NOFMA cinz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v rvvm1nofma) +command_arguments(HEADER_PARAMS_RVVM2 finz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v rvvm2) +command_arguments(HEADER_PARAMS_RVVM2NOFMA cinz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v rvvm2nofma) command_arguments(HEADER_PARAMS_DSP_SCALAR - 1 1 double float int32_t int32_t __STDC__) command_arguments(HEADER_PARAMS_PUREC_SCALAR cinz_ 1 1 double float int32_t int32_t __STDC__ purec) @@ -158,10 +158,10 @@ command_arguments(RENAME_PARAMS_GNUABI_ADVSIMD advsimd n 2 4 float64x2_t float3 # the "x" token of VLA SVE vector functions. command_arguments(RENAME_PARAMS_GNUABI_SVE sve s x x svfloat64_t svfloat32_t svint32_t svint32_t __ARM_SVE) -command_arguments(RENAME_PARAMS_RVVM1 finz_ x x m1) -command_arguments(RENAME_PARAMS_RVVM1NOFMA cinz_ x x m1nofma) -command_arguments(RENAME_PARAMS_RVVM2 finz_ x x m2) -command_arguments(RENAME_PARAMS_RVVM2NOFMA cinz_ x x m2nofma) +command_arguments(RENAME_PARAMS_RVVM1 finz_ x x rvvm1) +command_arguments(RENAME_PARAMS_RVVM1NOFMA cinz_ x x rvvm1nofma) +command_arguments(RENAME_PARAMS_RVVM2 finz_ x x rvvm2) +command_arguments(RENAME_PARAMS_RVVM2NOFMA cinz_ x x rvvm2nofma) # ALIAS_PARAMS From 562a69ced35057a413e84df66065e829f762d5a2 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Thu, 16 Nov 2023 13:50:13 +0000 Subject: [PATCH 19/24] Add sysroot for dependencies --- .github/workflows/build_and_test.yml | 34 ++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1953cbc5..d11cc549 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -137,9 +137,13 @@ jobs: compiler: [gcc, llvm] include: - arch: armhf + binfmt: arm gnupkg: -arm-linux-gnueabihf - arch: ppc64el + binfmt: ppc64le gnupkg: -powerpc64le-linux-gnu + - arch: aarch64 + debarch: arm64 exclude: # Only GCC trunk supports the RISC-V V intrinsics and https://github.com/riscv-collab/riscv-gnu-toolchain # doesn't track a recent enough version yet @@ -155,7 +159,7 @@ jobs: - name: Install dependencies run: | sudo apt-get update -y -qq - sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev + sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev debootstrap # Needed for llvm builds as well for target libraries - name: Install gcc @@ -171,6 +175,30 @@ jobs: rm llvm.sh if: ${{ matrix.compiler == 'llvm' }} + - name: Setup QEMU + uses: docker/setup-qemu-action@v3.0.0 + with: + platforms: ${{ matrix.binfmt || matrix.arch }} + + - name: Check sysroot cache + id: check-sysroot-cache + uses: actions/cache@v3 + with: + path: sysroot + key: sysroot-${{ matrix.arch }}-${{ hashFiles('./.github/workflows/build_and_test.yml') }} + + - name: Create sysroot + run: | + sudo debootstrap --arch=${{ matrix.debarch || matrix.arch }} --verbose --include=fakeroot,symlinks,libmpfr-dev,libssl-dev --resolve-deps --variant=minbase --components=main,universe focal sysroot + # Remove unused files to minimize cache + sudo chroot sysroot symlinks -cr . + sudo chown ${USER} -R sysroot + rm -rf sysroot/{dev,proc,run,sys,var} + rm -rf sysroot/usr/{sbin,bin,share} + rm -rf sysroot/usr/lib/{apt,gcc,udev,systemd} + rm -rf sysroot/usr/libexec/gcc + if: steps.check-sysroot-cache.outputs.cache-hit != 'true' + - name: Download build-native-${{ matrix.compiler }} artifacts uses: actions/download-artifact@v3 with: @@ -208,6 +236,7 @@ jobs: cmake -S . -B _build-${{ matrix.arch }} -GNinja \ -DCMAKE_INSTALL_PREFIX="$(pwd)/_install-${{ matrix.arch }}" \ -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-${{ matrix.arch }}-${{ matrix.compiler }}.cmake \ + -DCMAKE_SYSROOT=$(pwd)/sysroot \ -DNATIVE_BUILD_DIR="$(pwd)/_build-native" \ ${COMMON_CMAKE_FLAGS} \ ${EXTRA_CMAKE_FLAGS} @@ -312,7 +341,8 @@ jobs: with: persist-credentials: false - - uses: docker/setup-qemu-action@v3.0.0 + - name: Setup QEMU + uses: docker/setup-qemu-action@v3.0.0 with: platforms: ${{ matrix.binfmt || matrix.arch }} From 7de9fd489c16d35607b5fccc18d5bc6003fd1cb4 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Fri, 17 Nov 2023 14:09:37 +0000 Subject: [PATCH 20/24] Fix rounding functions --- src/arch/helperrvv.h | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h index 3090121c..fff0b3fa 100644 --- a/src/arch/helperrvv.h +++ b/src/arch/helperrvv.h @@ -140,6 +140,9 @@ typedef vint32m4_t dfi_t; #define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m2 #define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m1 #define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m1 +#define SLEEF_RVV_SP_VFNCVT_X_F_VI __riscv_vfcvt_x_f_v_i32m1_rm +#define SLEEF_RVV_SP_VFCVT_F_X_VF __riscv_vfcvt_f_x_v_f32m1 +#define SLEEF_RVV_SP_VFCVT_X_F_VF_RM __riscv_vfcvt_x_f_v_i32m1_rm #define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m1 #define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP) #define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32mf2 @@ -178,6 +181,9 @@ typedef vint32m4_t dfi_t; #define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1 #define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m1 #define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32mf2 +#define SLEEF_RVV_DP_VFNCVT_X_F_VI __riscv_vfncvt_x_f_w_i32mf2_rm +#define SLEEF_RVV_DP_VFCVT_F_X_VD __riscv_vfcvt_f_x_v_f64m1 +#define SLEEF_RVV_DP_VFCVT_X_F_VD_RM __riscv_vfcvt_x_f_v_i64m1_rm #elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) @@ -223,6 +229,9 @@ typedef vint32m8_t dfi_t; #define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m4 #define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m2 #define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m2 +#define SLEEF_RVV_SP_VFNCVT_X_F_VI __riscv_vfcvt_x_f_v_i32m2_rm +#define SLEEF_RVV_SP_VFCVT_F_X_VF __riscv_vfcvt_f_x_v_f32m2 +#define SLEEF_RVV_SP_VFCVT_X_F_VF_RM __riscv_vfcvt_x_f_v_i32m2_rm #define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m2 #define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP) #define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32m1 @@ -261,6 +270,9 @@ typedef vint32m8_t dfi_t; #define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1 #define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m2 #define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32m1 +#define SLEEF_RVV_DP_VFNCVT_X_F_VI __riscv_vfncvt_x_f_w_i32m1_rm +#define SLEEF_RVV_DP_VFCVT_F_X_VD __riscv_vfcvt_f_x_v_f64m2 +#define SLEEF_RVV_DP_VFCVT_X_F_VD_RM __riscv_vfcvt_x_f_v_i64m2_rm #else #error "unknown rvv lmul" @@ -344,13 +356,7 @@ static INLINE vfloat vcast_vf_f(float f) { return SLEEF_RVV_SP_VCAST_VF_F(f, VECTLENSP); } static INLINE vfloat vrint_vf_vf(vfloat vd) { - // It is not currently possible to safely set frm for intrinsics, - // so emulate round-to-nearest behavior - vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP); - half = __riscv_vfsgnj(half, vd, VECTLENSP); - vfloat res = __riscv_vfadd(vd, half, VECTLENSP); - vint2 i = __riscv_vfcvt_rtz_x(res, VECTLENSP); - return __riscv_vfcvt_f(i, VECTLENSP); + return SLEEF_RVV_SP_VFCVT_F_X_VF(SLEEF_RVV_SP_VFCVT_X_F_VF_RM(vd, __RISCV_VXRM_RNU, VECTLENSP), VECTLENSP); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return __riscv_vfcvt_f(vi, VECTLENSP); @@ -359,12 +365,7 @@ static INLINE vint2 vcast_vi2_i(int i) { return SLEEF_RVV_SP_VCAST_VI2_I(i, VECTLENSP); } static INLINE vint2 vrint_vi2_vf(vfloat vf) { - // It is not currently possible to safely set frm for intrinsics, - // so emulate round-to-nearest behavior - vfloat half = SLEEF_RVV_SP_VCAST_VF_F(0.5, VECTLENSP); - half = __riscv_vfsgnj(half, vf, VECTLENSP); - vfloat res = __riscv_vfadd(vf, half, VECTLENSP); - return __riscv_vfcvt_rtz_x(res, VECTLENSP); + return SLEEF_RVV_SP_VFNCVT_X_F_VI(vf, __RISCV_VXRM_RNU, VECTLENSP); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return __riscv_vfcvt_rtz_x(vf, VECTLENSP); @@ -742,20 +743,10 @@ static INLINE vint vcast_vi_i(int32_t i) { return SLEEF_RVV_DP_VCAST_VI_I(i, VECTLENDP); } static INLINE vint vrint_vi_vd(vdouble vd) { - // It is not currently possible to safely set frm for intrinsics, - // so emulate round-to-nearest behavior - vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP); - half = __riscv_vfsgnj(half, vd, VECTLENDP); - vdouble res = __riscv_vfadd(vd, half, VECTLENDP); - return __riscv_vfncvt_rtz_x(res, VECTLENDP); + return SLEEF_RVV_DP_VFNCVT_X_F_VI(vd, __RISCV_VXRM_RNU, VECTLENDP); } static INLINE vdouble vrint_vd_vd(vdouble vd) { - // It is not currently possible to safely set frm for intrinsics, - // so emulate round-to-nearest behavior - vdouble half = SLEEF_RVV_DP_VCAST_VD_D(0.5, VECTLENDP); - half = __riscv_vfsgnj(half, vd, VECTLENDP); - vdouble res = __riscv_vfadd(vd, half, VECTLENDP); - return __riscv_vfwcvt_f(__riscv_vfncvt_rtz_x(res, VECTLENDP), VECTLENDP); + return SLEEF_RVV_DP_VFCVT_F_X_VD(SLEEF_RVV_DP_VFCVT_X_F_VD_RM(vd, __RISCV_VXRM_RNU, VECTLENDP), VECTLENDP); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return __riscv_vfncvt_rtz_x(vd, VECTLENDP); From 6a3551cd21a1eaa4937aab14bad33b1072c98e51 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Fri, 17 Nov 2023 23:28:59 +0000 Subject: [PATCH 21/24] Use correct rounding mode --- src/arch/helperrvv.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h index fff0b3fa..10c734e2 100644 --- a/src/arch/helperrvv.h +++ b/src/arch/helperrvv.h @@ -356,7 +356,7 @@ static INLINE vfloat vcast_vf_f(float f) { return SLEEF_RVV_SP_VCAST_VF_F(f, VECTLENSP); } static INLINE vfloat vrint_vf_vf(vfloat vd) { - return SLEEF_RVV_SP_VFCVT_F_X_VF(SLEEF_RVV_SP_VFCVT_X_F_VF_RM(vd, __RISCV_VXRM_RNU, VECTLENSP), VECTLENSP); + return SLEEF_RVV_SP_VFCVT_F_X_VF(SLEEF_RVV_SP_VFCVT_X_F_VF_RM(vd, __RISCV_FRM_RNE, VECTLENSP), VECTLENSP); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return __riscv_vfcvt_f(vi, VECTLENSP); @@ -365,7 +365,7 @@ static INLINE vint2 vcast_vi2_i(int i) { return SLEEF_RVV_SP_VCAST_VI2_I(i, VECTLENSP); } static INLINE vint2 vrint_vi2_vf(vfloat vf) { - return SLEEF_RVV_SP_VFNCVT_X_F_VI(vf, __RISCV_VXRM_RNU, VECTLENSP); + return SLEEF_RVV_SP_VFNCVT_X_F_VI(vf, __RISCV_FRM_RNE, VECTLENSP); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return __riscv_vfcvt_rtz_x(vf, VECTLENSP); @@ -743,10 +743,10 @@ static INLINE vint vcast_vi_i(int32_t i) { return SLEEF_RVV_DP_VCAST_VI_I(i, VECTLENDP); } static INLINE vint vrint_vi_vd(vdouble vd) { - return SLEEF_RVV_DP_VFNCVT_X_F_VI(vd, __RISCV_VXRM_RNU, VECTLENDP); + return SLEEF_RVV_DP_VFNCVT_X_F_VI(vd, __RISCV_FRM_RNE, VECTLENDP); } static INLINE vdouble vrint_vd_vd(vdouble vd) { - return SLEEF_RVV_DP_VFCVT_F_X_VD(SLEEF_RVV_DP_VFCVT_X_F_VD_RM(vd, __RISCV_VXRM_RNU, VECTLENDP), VECTLENDP); + return SLEEF_RVV_DP_VFCVT_F_X_VD(SLEEF_RVV_DP_VFCVT_X_F_VD_RM(vd, __RISCV_FRM_RNE, VECTLENDP), VECTLENDP); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return __riscv_vfncvt_rtz_x(vd, VECTLENDP); From ca8e731c30fb23dc644575858f675d82c9495560 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Sat, 18 Nov 2023 22:23:01 +0100 Subject: [PATCH 22/24] Use __riscv_vlenb --- CMakeLists.txt | 5 +++-- src/arch/helperrvv.h | 20 ++++++++++---------- src/libm-tester/tester3.c | 8 ++++---- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bd3ab90..c89fa6c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,8 +89,9 @@ set(COSTOVERRIDE_SVE 10) set(COSTOVERRIDE_SVENOFMA 10) set(COSTOVERRIDE_RVVM1 10) set(COSTOVERRIDE_RVVM1NOFMA 10) -set(COSTOVERRIDE_RVVM2 10) -set(COSTOVERRIDE_RVVM2NOFMA 10) +set(COSTOVERRIDE_RVVM2 20) +set(COSTOVERRIDE_RVVM2NOFMA 20 +) # diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h index 10c734e2..8450e51a 100644 --- a/src/arch/helperrvv.h +++ b/src/arch/helperrvv.h @@ -12,27 +12,27 @@ #if CONFIG == 1 || CONFIG == 2 #define ISANAME "RISC-V Vector Extension with Min. VLEN" -#define SLEEF_RVV_VLEN __riscv_v_min_vlen +#define SLEEF_RVV_VLEN __riscv_vlenb() #elif CONFIG == 7 // 128-bit vector length #define ISANAME "RISC-V Vector Extension 128-bit" -#define SLEEF_RVV_VLEN (1 << 7) +#define SLEEF_RVV_VLEN ((1 << 7) / 8) #elif CONFIG == 8 // 256-bit vector length #define ISANAME "RISC-V Vector Extension 256-bit" -#define SLEEF_RVV_VLEN (1 << 8) +#define SLEEF_RVV_VLEN ((1 << 8) / 8) #elif CONFIG == 9 // 512-bit vector length #define ISANAME "RISC-V Vector Extension 512-bit" -#define SLEEF_RVV_VLEN (1 << 9) +#define SLEEF_RVV_VLEN ((1 << 9) / 8) #elif CONFIG == 10 // 1024-bit vector length #define ISANAME "RISC-V Vector Extension 1024-bit" -#define SLEEF_RVV_VLEN (1 << 10) +#define SLEEF_RVV_VLEN ((1 << 10) / 8) #elif CONFIG == 11 // 2048-bit vector length #define ISANAME "RISC-V Vector Extension 2048-bit" -#define SLEEF_RVV_VLEN (1 << 11) +#define SLEEF_RVV_VLEN ((1 << 11) / 8) #else #error CONFIG macro invalid or not defined #endif @@ -114,8 +114,8 @@ typedef vint32m2_t fi_t; typedef vint32m4_t dfi_t; #define SLEEF_RVV_SP_LMUL 1 #define SLEEF_RVV_DP_LMUL 1 -#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32) -#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64) +#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / sizeof(float)) +#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / sizeof(double)) #define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m1 #define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m1 #define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m1 @@ -203,8 +203,8 @@ typedef vint32m4_t fi_t; typedef vint32m8_t dfi_t; #define SLEEF_RVV_SP_LMUL 2 #define SLEEF_RVV_DP_LMUL 2 -#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / 32) -#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / 64) +#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / sizeof(float)) +#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / sizeof(double)) #define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m2 #define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m2 #define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m2 diff --git a/src/libm-tester/tester3.c b/src/libm-tester/tester3.c index 3027dff3..a55404ed 100644 --- a/src/libm-tester/tester3.c +++ b/src/libm-tester/tester3.c @@ -104,8 +104,8 @@ static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float #if __riscv && __riscv_v #if defined(ENABLE_RVVM1) -#define VECTLENSP (1 * __riscv_v_min_vlen / 32) -#define VECTLENDP (1 * __riscv_v_min_vlen / 64) +#define VECTLENSP (1 * __riscv_vlenb() / sizeof(float)) +#define VECTLENDP (1 * __riscv_vlenb() / sizeof(double)) static INLINE vfloat32m1_t setvfloat32m1_t(float d, int r) { float a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m1(a, VECTLENSP); } static INLINE float getvfloat32m1_t(vfloat32m1_t v, int r) { float a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); } @@ -118,8 +118,8 @@ static vfloat64m1_t vd2getx_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v static vfloat64m1_t vd2gety_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 1); } #elif defined(ENABLE_RVVM2) -#define VECTLENSP (2 * __riscv_v_min_vlen / 32) -#define VECTLENDP (2 * __riscv_v_min_vlen / 64) +#define VECTLENSP (2 * __riscv_vlenb() / sizeof(float)) +#define VECTLENDP (2 * __riscv_vlenb() / sizeof(double)) static INLINE vfloat32m2_t setvfloat32m2_t(float d, int r) { float a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m2(a, VECTLENSP); } static INLINE float getvfloat32m2_t(vfloat32m2_t v, int r) { float a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); } From 12a174beaf5ca9b20168ebbb968f9d9c842fd505 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Sat, 18 Nov 2023 23:44:07 +0000 Subject: [PATCH 23/24] fixup! Use __riscv_vlenb --- Configure.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Configure.cmake b/Configure.cmake index ac64f16f..58fe5403 100644 --- a/Configure.cmake +++ b/Configure.cmake @@ -634,7 +634,7 @@ if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM1) CHECK_C_SOURCE_COMPILES(" #include int main() { - vint32m1_t r = __riscv_vmv_v_x_i32m1(1, __riscv_v_min_vlen / 32); }" + vint32m1_t r = __riscv_vmv_v_x_i32m1(1, __riscv_vlenb() * 8 / 32); }" COMPILER_SUPPORTS_RVVM1) if(COMPILER_SUPPORTS_RVVM1) @@ -656,7 +656,7 @@ if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM2) CHECK_C_SOURCE_COMPILES(" #include int main() { - vint32m2_t r = __riscv_vmv_v_x_i32m2(1, __riscv_v_min_vlen / 32); }" + vint32m2_t r = __riscv_vmv_v_x_i32m2(1, 2 * __riscv_vlenb() * 8 / 32); }" COMPILER_SUPPORTS_RVVM2) if(COMPILER_SUPPORTS_RVVM2) From 19ee07e6b371a9dbc84a223c6e05d26bdf0aeac8 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Mon, 20 Nov 2023 10:34:19 +0000 Subject: [PATCH 24/24] Fix indent --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c89fa6c6..c38219f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,8 +90,7 @@ set(COSTOVERRIDE_SVENOFMA 10) set(COSTOVERRIDE_RVVM1 10) set(COSTOVERRIDE_RVVM1NOFMA 10) set(COSTOVERRIDE_RVVM2 20) -set(COSTOVERRIDE_RVVM2NOFMA 20 -) +set(COSTOVERRIDE_RVVM2NOFMA 20) #