diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3540b600..d11cc549 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -7,46 +7,76 @@ on: push: pull_request: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + permissions: contents: read env: + GCC_VERSION: "11" + LLVM_VERSION: "17" COMMON_CMAKE_FLAGS: | -DSLEEF_SHOW_CONFIG=1 - -DDISABLE_SSL=ON -DBUILD_GNUABI_LIBS=ON -DBUILD_INLINE_HEADERS=ON -DBUILD_DFT=ON -DBUILD_QUAD=ON -DBUILD_SCALAR_LIB=ON -DBUILD_STATIC_TEST_BINS=ON + -DENFORCE_TESTER=ON + -DENFORCE_TESTER3=ON jobs: build-native: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + compiler: [gcc, llvm] + + name: build-native-${{ matrix.compiler }} steps: - uses: actions/checkout@v4.1.1 with: persist-credentials: false - name: Install dependencies - run: sudo apt-get update -y -qq && sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev + run: | + sudo apt-get update -y -qq + sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev + + # Needed for llvm builds as well for target libraries + - name: Install gcc + run: | + sudo apt-get install -y -qq gcc-${GCC_VERSION} + + - name: Install llvm + run: | + curl -o llvm.sh https://apt.llvm.org/llvm.sh + chmod u+x llvm.sh + sudo ./llvm.sh ${LLVM_VERSION} + sudo ln -srf $(which clang-${LLVM_VERSION}) /usr/bin/clang + rm llvm.sh + if: ${{ matrix.compiler == 'llvm' }} - name: Build native + shell: bash -ex -o pipefail {0} run: | - set -x EXTRA_CMAKE_FLAGS="-DENFORCE_SSE2=ON -DENFORCE_SSE4=ON -DENFORCE_AVX=ON -DENFORCE_AVX=ON -DENFORCE_AVX2=ON -DENFORCE_AVX512F=ON -DENFORCE_FMA4=ON" cmake -S . -B _build-native -GNinja \ -DCMAKE_INSTALL_PREFIX=$(pwd)/_install-native \ + -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-native-${{ matrix.compiler }}.cmake \ ${COMMON_CMAKE_FLAGS} \ ${EXTRA_CMAKE_FLAGS} cmake --build _build-native cmake --install _build-native - - name: Upload build-native artifacts + - name: Upload build-native-${{ matrix.compiler }} artifacts uses: actions/upload-artifact@v3 with: - name: build-native + name: build-native-${{ matrix.compiler }} path: | _build-* _install-* @@ -55,6 +85,12 @@ jobs: test-native: runs-on: ubuntu-latest needs: [build-native] + strategy: + fail-fast: false + matrix: + compiler: [gcc, llvm] + + name: test-native-${{ matrix.compiler }} steps: - uses: actions/checkout@v4.1.1 with: @@ -67,12 +103,12 @@ jobs: run: | cat /proc/cpuinfo - - name: Download build-native artifacts + - name: Download build-native-${{ matrix.compiler }} artifacts uses: actions/download-artifact@v3 with: - name: build-native + name: build-native-${{ matrix.compiler }} - - name: Fix build-native permissions + - name: Fix _build-native permissions run: | chmod +x _build-native/bin/* @@ -97,19 +133,24 @@ jobs: strategy: fail-fast: false matrix: + arch: [aarch64, armhf, ppc64el, s390x, riscv64] + compiler: [gcc, llvm] include: - # AArch64 - - arch: aarch64 - # Aarch32 - arch: armhf - package: -arm-linux-gnueabihf - # PPC64 + binfmt: arm + gnupkg: -arm-linux-gnueabihf - arch: ppc64el - package: -powerpc64le-linux-gnu - # IBM Z - - arch: s390x - - name: build-${{ matrix.arch }} + binfmt: ppc64le + gnupkg: -powerpc64le-linux-gnu + - arch: aarch64 + debarch: arm64 + exclude: + # Only GCC trunk supports the RISC-V V intrinsics and https://github.com/riscv-collab/riscv-gnu-toolchain + # doesn't track a recent enough version yet + - arch: riscv64 + compiler: gcc + + name: build-${{ matrix.arch }}-${{ matrix.compiler }} steps: - uses: actions/checkout@v4.1.1 with: @@ -118,20 +159,58 @@ jobs: - name: Install dependencies run: | sudo apt-get update -y -qq - sudo apt-get install -y -qq build-essential clang curl ninja-build libgmp-dev libmpfr-dev gcc${{ matrix.package || format('-{0}-linux-gnu', matrix.arch) }} + sudo apt-get install -y -qq build-essential curl ninja-build libgmp-dev libmpfr-dev debootstrap + + # Needed for llvm builds as well for target libraries + - name: Install gcc + run: | + sudo apt-get install -y -qq gcc-${GCC_VERSION}${{ matrix.gnupkg || format('-{0}-linux-gnu', matrix.arch) }} - - name: Download build-native artifacts + - name: Install llvm + run: | + curl -o llvm.sh https://apt.llvm.org/llvm.sh + chmod u+x llvm.sh + sudo ./llvm.sh ${LLVM_VERSION} + sudo ln -srf $(which clang-${LLVM_VERSION}) /usr/bin/clang + rm llvm.sh + if: ${{ matrix.compiler == 'llvm' }} + + - name: Setup QEMU + uses: docker/setup-qemu-action@v3.0.0 + with: + platforms: ${{ matrix.binfmt || matrix.arch }} + + - name: Check sysroot cache + id: check-sysroot-cache + uses: actions/cache@v3 + with: + path: sysroot + key: sysroot-${{ matrix.arch }}-${{ hashFiles('./.github/workflows/build_and_test.yml') }} + + - name: Create sysroot + run: | + sudo debootstrap --arch=${{ matrix.debarch || matrix.arch }} --verbose --include=fakeroot,symlinks,libmpfr-dev,libssl-dev --resolve-deps --variant=minbase --components=main,universe focal sysroot + # Remove unused files to minimize cache + sudo chroot sysroot symlinks -cr . + sudo chown ${USER} -R sysroot + rm -rf sysroot/{dev,proc,run,sys,var} + rm -rf sysroot/usr/{sbin,bin,share} + rm -rf sysroot/usr/lib/{apt,gcc,udev,systemd} + rm -rf sysroot/usr/libexec/gcc + if: steps.check-sysroot-cache.outputs.cache-hit != 'true' + + - name: Download build-native-${{ matrix.compiler }} artifacts uses: actions/download-artifact@v3 with: - name: build-native + name: build-native-${{ matrix.compiler }} - - name: Fix build-native permissions + - name: Fix _build-native permissions run: | chmod +x _build-native/bin/* - name: Build ${{ matrix.arch }} + shell: bash -ex -o pipefail {0} run: | - set -x EXTRA_CMAKE_FLAGS="" if [[ ${{ matrix.arch }} = "aarch64" ]]; then EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_SVE=ON" @@ -142,22 +221,32 @@ jobs: EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VSX=ON -DENFORCE_VSX3=ON" elif [[ ${{ matrix.arch }} = "s390x" ]]; then EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_VXE=ON" - # Disable VXE2 support, QEMU doesn't support it + # Disable VXE2 support, QEMU doesn't support some instructions generated by gcc or llvm EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DDISABLE_VXE2=ON" + elif [[ ${{ matrix.arch }} = "riscv64" ]]; then + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DENFORCE_RVVM1=ON -DENFORCE_RVVM2=ON" + # Disable inline headers, they just don't compile on riscv64 + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_INLINE_HEADERS=OFF" + # Disable dft, it fails with linker error to `cexp` + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_DFT=OFF" + # Disable quad, it's missing the `Sleef_quad` function + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DBUILD_QUAD=OFF" fi + cmake -S . -B _build-${{ matrix.arch }} -GNinja \ -DCMAKE_INSTALL_PREFIX="$(pwd)/_install-${{ matrix.arch }}" \ - -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-${{ matrix.arch }}.cmake \ + -DCMAKE_TOOLCHAIN_FILE=$(pwd)/travis/toolchain-${{ matrix.arch }}-${{ matrix.compiler }}.cmake \ + -DCMAKE_SYSROOT=$(pwd)/sysroot \ -DNATIVE_BUILD_DIR="$(pwd)/_build-native" \ ${COMMON_CMAKE_FLAGS} \ ${EXTRA_CMAKE_FLAGS} cmake --build _build-${{ matrix.arch }} cmake --install _build-${{ matrix.arch }} - - name: Upload build-${{ matrix.arch }} artifacts + - name: Upload build-${{ matrix.arch }}-${{ matrix.compiler }} artifacts uses: actions/upload-artifact@v3 with: - name: build-${{ matrix.arch }} + name: build-${{ matrix.arch }}-${{ matrix.compiler }} path: | _build-${{ matrix.arch }} _install-${{ matrix.arch }} @@ -172,32 +261,88 @@ jobs: include: # AArch64 - arch: aarch64 + compiler: gcc qemu_cpu: "max,sve=off" - arch: aarch64 + compiler: gcc qemu_cpu: "max,sve=on,sve128=on" - arch: aarch64 + compiler: gcc qemu_cpu: "max,sve=on,sve256=on" - arch: aarch64 + compiler: gcc qemu_cpu: "max,sve=on,sve512=on" + # Some tests fail when compiled with LLVM only + # - arch: aarch64 + # compiler: llvm + # qemu_cpu: "max,sve=off" + # - arch: aarch64 + # compiler: llvm + # qemu_cpu: "max,sve=on,sve128=on" + # - arch: aarch64 + # compiler: llvm + # qemu_cpu: "max,sve=on,sve256=on" + # - arch: aarch64 + # compiler: llvm + # qemu_cpu: "max,sve=on,sve512=on" # Aarch32 - arch: armhf + compiler: gcc + binfmt: arm + qemu_cpu: "max" + - arch: armhf + compiler: llvm binfmt: arm qemu_cpu: "max" # PPC64 - arch: ppc64el + compiler: gcc + binfmt: ppc64le + qemu_cpu: "power10" + - arch: ppc64el + compiler: llvm binfmt: ppc64le qemu_cpu: "power10" # IBM Z # TODO: figure out qemu_cpu variable to make tests pass on QEMU - arch: s390x - - name: "test-${{ matrix.arch }} (qemu_cpu: \"${{ matrix.qemu_cpu }}\")" + compiler: gcc + - arch: s390x + compiler: llvm + # RISC-V + # - arch: riscv64 + # compiler: gcc + # qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=false" + # - arch: riscv64 + # compiler: gcc + # qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0" + # - arch: riscv64 + # compiler: gcc + # qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0" + # - arch: riscv64 + # compiler: gcc + # qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0" + - arch: riscv64 + compiler: llvm + qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=false" + - arch: riscv64 + compiler: llvm + qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0" + - arch: riscv64 + compiler: llvm + qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0" + - arch: riscv64 + compiler: llvm + qemu_cpu: "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0" + + name: "test-${{ matrix.arch }}-${{ matrix.compiler }} (qemu_cpu: \"${{ matrix.qemu_cpu }}\")" steps: - uses: actions/checkout@v4.1.1 with: persist-credentials: false - - uses: docker/setup-qemu-action@v3.0.0 + - name: Setup QEMU + uses: docker/setup-qemu-action@v3.0.0 with: platforms: ${{ matrix.binfmt || matrix.arch }} @@ -208,20 +353,19 @@ jobs: run: | cat /proc/cpuinfo - - name: Download build-native artifacts + - name: Download build-native-${{ matrix.compiler }} artifacts uses: actions/download-artifact@v3 with: - name: build-native + name: build-native-${{ matrix.compiler }} - - name: Download build-${{ matrix.arch }} artifacts + - name: Download build-${{ matrix.arch }}-${{ matrix.compiler }} artifacts uses: actions/download-artifact@v3 with: - name: build-${{ matrix.arch }} + name: build-${{ matrix.arch }}-${{ matrix.compiler }} - - name: Fix build-native and _build-${{ matrix.arch }} permissions + - name: Fix _build-native and _build-${{ matrix.arch }} permissions run: | - chmod +x _build-native/bin/* - chmod +x _build-${{ matrix.arch }}/bin/* + chmod +x _build-native/bin/* _build-${{ matrix.arch }}/bin/* - name: Test ${{ matrix.arch }} env: @@ -233,10 +377,10 @@ jobs: cd _build-${{ matrix.arch }} ctest -j$(nproc) - - name: Upload test-${{ matrix.arch }}-${{ strategy.job-index }} artifacts + - name: Upload test-${{ matrix.arch }}-${{ matrix.compiler }}-${{ strategy.job-index }} artifacts uses: actions/upload-artifact@v3 with: - name: test-${{ matrix.arch }}-${{ strategy.job-index }} + name: test-${{ matrix.arch }}-${{ matrix.compiler }}-${{ strategy.job-index }} path: | _build-${{ matrix.arch }}/Testing if: always() diff --git a/CMakeLists.txt b/CMakeLists.txt index ec9e04e3..c38219f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,6 +46,7 @@ set(SLEEF_ALL_SUPPORTED_EXTENSIONS NEON32 NEON32VFPV4 # Aarch32 VSX VSXNOFMA VSX3 VSX3NOFMA # PPC64 VXE VXENOFMA VXE2 VXE2NOFMA # IBM Z + RVVM1NOFMA RVVM1 RVVM2NOFMA RVVM2 # RISC-V Vectors PUREC_SCALAR PURECFMA_SCALAR # Generic type CACHE STRING "List of SIMD architectures supported by libsleef." ) @@ -56,6 +57,7 @@ set(SLEEF_SUPPORTED_LIBM_EXTENSIONS NEON32 NEON32VFPV4 # Aarch32 VSX VSXNOFMA VSX3 VSX3NOFMA # PPC64 VXE VXENOFMA VXE2 VXE2NOFMA # IBM Z + RVVM1NOFMA RVVM1 RVVM2NOFMA RVVM2 # RISC-V Vectors PUREC_SCALAR PURECFMA_SCALAR # Generic type CACHE STRING "List of SIMD architectures supported by libsleef." ) @@ -85,6 +87,10 @@ set(COSTOVERRIDE_NEON32 2) set(COSTOVERRIDE_NEON32VFPV4 2) set(COSTOVERRIDE_SVE 10) set(COSTOVERRIDE_SVENOFMA 10) +set(COSTOVERRIDE_RVVM1 10) +set(COSTOVERRIDE_RVVM1NOFMA 10) +set(COSTOVERRIDE_RVVM2 20) +set(COSTOVERRIDE_RVVM2NOFMA 20) # diff --git a/Configure.cmake b/Configure.cmake index 63d2c638..58fe5403 100644 --- a/Configure.cmake +++ b/Configure.cmake @@ -118,7 +118,10 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") set(SLEEF_ARCH_S390X ON CACHE INTERNAL "True for IBM Z architecture.") + set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-march=z14;-mzvector") set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z14;-mzvector") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") + set(SLEEF_ARCH_RISCV64 ON CACHE INTERNAL "True for RISCV64 architecture.") endif() set(COMPILER_SUPPORTS_PUREC_SCALAR 1) @@ -163,6 +166,11 @@ set(CLANG_FLAGS_ENABLE_VXE "-march=z14;-mzvector") set(CLANG_FLAGS_ENABLE_VXENOFMA "-march=z14;-mzvector") set(CLANG_FLAGS_ENABLE_VXE2 "-march=z15;-mzvector") set(CLANG_FLAGS_ENABLE_VXE2NOFMA "-march=z15;-mzvector") +# RISC-V +set(CLANG_FLAGS_ENABLE_RVVM1 "-march=rv64gcv_zba_zbb_zbs") +set(CLANG_FLAGS_ENABLE_RVVM1NOFMA "-march=rv64gcv_zba_zbb_zbs") +set(CLANG_FLAGS_ENABLE_RVVM2 "-march=rv64gcv_zba_zbb_zbs") +set(CLANG_FLAGS_ENABLE_RVVM2NOFMA "-march=rv64gcv_zba_zbb_zbs") set(FLAGS_OTHERS "") @@ -616,6 +624,50 @@ if (ENFORCE_VXE2 AND NOT COMPILER_SUPPORTS_VXE2) message(FATAL_ERROR "ENFORCE_VXE2 is specified and that feature is disabled or not supported by the compiler") endif() +# RVVM1 + +option(DISABLE_RVVM1 "Disable RVVM1" OFF) +option(ENFORCE_RVVM1 "Build fails if RVVM1 is not supported by the compiler" OFF) + +if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM1) + string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_RVVM1}") + CHECK_C_SOURCE_COMPILES(" + #include + int main() { + vint32m1_t r = __riscv_vmv_v_x_i32m1(1, __riscv_vlenb() * 8 / 32); }" + COMPILER_SUPPORTS_RVVM1) + + if(COMPILER_SUPPORTS_RVVM1) + set(COMPILER_SUPPORTS_RVVM1NOFMA 1) + endif() +endif() + +if (ENFORCE_RVVM1 AND NOT COMPILER_SUPPORTS_RVVM1) + message(FATAL_ERROR "ENFORCE_RVVM1 is specified and that feature is disabled or not supported by the compiler") +endif() + +# RVVM2 + +option(DISABLE_RVVM2 "Disable RVVM2" OFF) +option(ENFORCE_RVVM2 "Build fails if RVVM2 is not supported by the compiler" OFF) + +if(SLEEF_ARCH_RISCV64 AND NOT DISABLE_RVVM2) + string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_RVVM2}") + CHECK_C_SOURCE_COMPILES(" + #include + int main() { + vint32m2_t r = __riscv_vmv_v_x_i32m2(1, 2 * __riscv_vlenb() * 8 / 32); }" + COMPILER_SUPPORTS_RVVM2) + + if(COMPILER_SUPPORTS_RVVM2) + set(COMPILER_SUPPORTS_RVVM2NOFMA 1) + endif() +endif() + +if (ENFORCE_RVVM2 AND NOT COMPILER_SUPPORTS_RVVM2) + message(FATAL_ERROR "ENFORCE_RVVM2 is specified and that feature is disabled or not supported by the compiler") +endif() + # CUDA option(ENFORCE_CUDA "Build fails if CUDA is not supported" OFF) diff --git a/src/arch/helperpurec_scalar.h b/src/arch/helperpurec_scalar.h index d8b9c845..fb83b84c 100644 --- a/src/arch/helperpurec_scalar.h +++ b/src/arch/helperpurec_scalar.h @@ -54,7 +54,7 @@ #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP -#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || CONFIG == 3 +#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) || defined(__riscv) || CONFIG == 3 #ifndef FP_FAST_FMA //@#ifndef FP_FAST_FMA #define FP_FAST_FMA diff --git a/src/arch/helperrvv.h b/src/arch/helperrvv.h new file mode 100644 index 00000000..8450e51a --- /dev/null +++ b/src/arch/helperrvv.h @@ -0,0 +1,1067 @@ +#ifndef HELPERRVV_H +#define HELPERRVV_H + +#if !defined(SLEEF_GENHEADER) +#include +#include "misc.h" + +#if defined(VECTLENDP) || defined(VECTLENSP) +#error VECTLENDP or VECTLENSP already defined +#endif +#endif // #if !defined(SLEEF_GENHEADER) + +#if CONFIG == 1 || CONFIG == 2 +#define ISANAME "RISC-V Vector Extension with Min. VLEN" +#define SLEEF_RVV_VLEN __riscv_vlenb() +#elif CONFIG == 7 +// 128-bit vector length +#define ISANAME "RISC-V Vector Extension 128-bit" +#define SLEEF_RVV_VLEN ((1 << 7) / 8) +#elif CONFIG == 8 +// 256-bit vector length +#define ISANAME "RISC-V Vector Extension 256-bit" +#define SLEEF_RVV_VLEN ((1 << 8) / 8) +#elif CONFIG == 9 +// 512-bit vector length +#define ISANAME "RISC-V Vector Extension 512-bit" +#define SLEEF_RVV_VLEN ((1 << 9) / 8) +#elif CONFIG == 10 +// 1024-bit vector length +#define ISANAME "RISC-V Vector Extension 1024-bit" +#define SLEEF_RVV_VLEN ((1 << 10) / 8) +#elif CONFIG == 11 +// 2048-bit vector length +#define ISANAME "RISC-V Vector Extension 2048-bit" +#define SLEEF_RVV_VLEN ((1 << 11) / 8) +#else +#error CONFIG macro invalid or not defined +#endif + +#define ENABLE_SP +#define ENABLE_DP + +#if CONFIG != 2 +#define ENABLE_FMA_SP +#define ENABLE_FMA_DP +#endif + +static INLINE int vavailability_i(int name) { return -1; } + +//////////////////////////////////////////////////////////////////////////////// +// RISC-V Vector Types +//////////////////////////////////////////////////////////////////////////////// + +// About the RISC-V Vector type translations: +// +// Because the single- and double-precision versions of the RVV port have +// conflicting definitions of the vmask and vopmask types, they can only +// be defined for at most one precision level in a single translation unit. +// Any functions that use vmask or vopmask types are thus enabled only by the +// corresponding ENABLE_RVV_SP or ENABLE_RVV_DP macro guards. +#if defined(ENABLE_RVV_SP) && defined(ENABLE_RVV_DP) +#error Cannot simultaneously define ENABLE_RVV_SP and ENABLE_RVV_DP +#endif + +#ifdef ENABLE_RVV_SP +// Types that conflict with ENABLE_RVV_DP +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) +typedef vuint64m2_t vmask; +typedef vbool32_t vopmask; +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) +typedef vuint64m4_t vmask; +typedef vbool16_t vopmask; +#else +#error "unknown rvv lmul" +#endif +#endif + +#ifdef ENABLE_RVV_DP +// Types that conflict with ENABLE_RVV_SP +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) +typedef vuint64m1_t vmask; +typedef vbool64_t vopmask; +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) +typedef vuint64m2_t vmask; +typedef vbool32_t vopmask; +#else +#error "unknown rvv lmul" +#endif +#endif + +// LMUL-Dependent Type & Macro Definitions: +// +// Some SLEEF types are multi-value structs. RVV vectors have unknown length at +// compile time, so they cannote appear in a struct in Clang. They are instead +// represented as single vectors with "members" packed into the registers of a +// wide-LMUL register group. In the largest cases (ddi_t and ddf_t), this +// requires LMUL=8 if the base type (vfloat or vdouble) has LMUL=2, meaning +// LMUL=2 is currently the widest option for SLEEF function argument types. +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) + +typedef vint32mf2_t vint; +typedef vfloat64m1_t vdouble; +typedef vfloat64m2_t vdouble2; +typedef vfloat64m4_t vdouble3; +typedef vfloat64m4_t dd2; +typedef vuint64m2_t vquad; +typedef vint32m2_t di_t; +typedef vint32m4_t ddi_t; +typedef vfloat32m1_t vfloat; +typedef vfloat32m2_t vfloat2; +typedef vfloat32m4_t df2; +typedef vint32m1_t vint2; +typedef vint32m2_t fi_t; +typedef vint32m4_t dfi_t; +#define SLEEF_RVV_SP_LMUL 1 +#define SLEEF_RVV_DP_LMUL 1 +#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / sizeof(float)) +#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / sizeof(double)) +#define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m1 +#define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m1 +#define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m1 +#define SLEEF_RVV_SP_VREINTERPRET_VF __riscv_vreinterpret_f32m1 +#define SLEEF_RVV_SP_VREINTERPRET_VF2 __riscv_vreinterpret_f32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1 +#define SLEEF_RVV_SP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2 +#define SLEEF_RVV_SP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4 +#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m1 +#define SLEEF_RVV_SP_VREINTERPRET_VU2 __riscv_vreinterpret_u32m1 +#define SLEEF_RVV_SP_VGET_VI2 __riscv_vget_i32m1 +#define SLEEF_RVV_SP_VGET_2VI __riscv_vget_i32m2 +#define SLEEF_RVV_SP_VGET_VF __riscv_vget_f32m1 +#define SLEEF_RVV_SP_VGET_VF2 __riscv_vget_f32m2 +#define SLEEF_RVV_SP_VGET_4VF __riscv_vget_f32m4 +#define SLEEF_RVV_SP_VGET_VU2 __riscv_vget_u32m2 +#define SLEEF_RVV_SP_LOAD_VF __riscv_vle32_v_f32m1 +#define SLEEF_RVV_SP_LOAD_VI2 __riscv_vle32_v_i32m1 +#define SLEEF_RVV_SP_VCAST_VM_U __riscv_vmv_v_x_u64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m2 +#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m1 +#define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m1 +#define SLEEF_RVV_SP_VFNCVT_X_F_VI __riscv_vfcvt_x_f_v_i32m1_rm +#define SLEEF_RVV_SP_VFCVT_F_X_VF __riscv_vfcvt_f_x_v_f32m1 +#define SLEEF_RVV_SP_VFCVT_X_F_VF_RM __riscv_vfcvt_x_f_v_i32m1_rm +#define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m1 +#define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP) +#define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32mf2 +#define SLEEF_RVV_DP_VCAST_VM_U __riscv_vmv_v_x_u64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VD __riscv_vreinterpret_f64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VD2 __riscv_vreinterpret_f64m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \ + __riscv_vreinterpret_v_i64m2_i32m2(__riscv_vreinterpret_i64m2(x)) +#define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \ + __riscv_vreinterpret_f64m2(__riscv_vreinterpret_v_i32m2_i64m2(x)) +#define SLEEF_RVV_DP_VREINTERPRET_4VD __riscv_vreinterpret_f64m4 +#define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \ + __riscv_vreinterpret_f64m4(__riscv_vreinterpret_v_i32m4_i64m4(x)) +#define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \ + __riscv_vreinterpret_v_i64m4_i32m4(__riscv_vreinterpret_i64m4(x)) +#define SLEEF_RVV_DP_VREINTERPRET_VM __riscv_vreinterpret_u64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VU64 __riscv_vreinterpret_u64m1 +#define SLEEF_RVV_DP_VREINTERPRET_VI __riscv_vreinterpret_i32mf2 +#define SLEEF_RVV_DP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1 +#define SLEEF_RVV_DP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4 +#define SLEEF_RVV_DP_VREINTERPRET_8VI __riscv_vreinterpret_i32m8 +#define SLEEF_RVV_DP_VREINTERPRET_VU __riscv_vreinterpret_u32mf2 +#define SLEEF_RVV_DP_VREINTERPRET_2VU __riscv_vreinterpret_u32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VU __riscv_vreinterpret_u32m4 +#define SLEEF_RVV_DP_VGET_VM __riscv_vget_u64m1 +#define SLEEF_RVV_DP_VGET_VD __riscv_vget_f64m1 +#define SLEEF_RVV_DP_VGET_VD2 __riscv_vget_f64m2 +#define SLEEF_RVV_DP_VGET_4VD __riscv_vget_f64m2 +#define SLEEF_RVV_DP_VGET_VI __riscv_vget_i32m1 +#define SLEEF_RVV_DP_VGET_VI2 __riscv_vget_i32m1 +#define SLEEF_RVV_DP_VGET_2VI __riscv_vget_i32m1 +#define SLEEF_RVV_DP_VGET_4VI __riscv_vget_i32m2 +#define SLEEF_RVV_DP_VGET_8VI __riscv_vget_i32m4 +#define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1 +#define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m1 +#define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32mf2 +#define SLEEF_RVV_DP_VFNCVT_X_F_VI __riscv_vfncvt_x_f_w_i32mf2_rm +#define SLEEF_RVV_DP_VFCVT_F_X_VD __riscv_vfcvt_f_x_v_f64m1 +#define SLEEF_RVV_DP_VFCVT_X_F_VD_RM __riscv_vfcvt_x_f_v_i64m1_rm + +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) + +typedef vint32m1_t vint; +typedef vfloat64m2_t vdouble; +typedef vfloat64m4_t vdouble2; +typedef vfloat64m8_t vdouble3; +typedef vfloat64m8_t dd2; +typedef vuint64m4_t vquad; +typedef vint32m4_t di_t; +typedef vint32m8_t ddi_t; +typedef vfloat32m2_t vfloat; +typedef vfloat32m4_t vfloat2; +typedef vfloat32m8_t df2; +typedef vint32m2_t vint2; +typedef vint32m4_t fi_t; +typedef vint32m8_t dfi_t; +#define SLEEF_RVV_SP_LMUL 2 +#define SLEEF_RVV_DP_LMUL 2 +#define VECTLENSP (SLEEF_RVV_SP_LMUL * SLEEF_RVV_VLEN / sizeof(float)) +#define VECTLENDP (SLEEF_RVV_DP_LMUL * SLEEF_RVV_VLEN / sizeof(double)) +#define SLEEF_RVV_SP_VCAST_VF_F __riscv_vfmv_v_f_f32m2 +#define SLEEF_RVV_SP_VCAST_VI2_I __riscv_vmv_v_x_i32m2 +#define SLEEF_RVV_SP_VCAST_VU2_U __riscv_vmv_v_x_u32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VF __riscv_vreinterpret_f32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VF2 __riscv_vreinterpret_f32m4 +#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m2 +#define SLEEF_RVV_SP_VREINTERPRET_2VI __riscv_vreinterpret_i32m4 +#define SLEEF_RVV_SP_VREINTERPRET_4VI __riscv_vreinterpret_i32m8 +#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m2 +#define SLEEF_RVV_SP_VREINTERPRET_VU2 __riscv_vreinterpret_u32m2 +#define SLEEF_RVV_SP_VGET_VI2 __riscv_vget_i32m2 +#define SLEEF_RVV_SP_VGET_2VI __riscv_vget_i32m4 +#define SLEEF_RVV_SP_VGET_VF __riscv_vget_f32m2 +#define SLEEF_RVV_SP_VGET_VF2 __riscv_vget_f32m4 +#define SLEEF_RVV_SP_VGET_4VF __riscv_vget_f32m8 +#define SLEEF_RVV_SP_VGET_VU2 __riscv_vget_u32m4 +#define SLEEF_RVV_SP_LOAD_VF __riscv_vle32_v_f32m2 +#define SLEEF_RVV_SP_LOAD_VI2 __riscv_vle32_v_i32m2 +#define SLEEF_RVV_SP_VCAST_VM_U __riscv_vmv_v_x_u64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VM __riscv_vreinterpret_u64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m4 +#define SLEEF_RVV_SP_VREINTERPRET_VU __riscv_vreinterpret_u32m2 +#define SLEEF_RVV_SP_LOAD_VI __riscv_vle32_v_i32m2 +#define SLEEF_RVV_SP_VFNCVT_X_F_VI __riscv_vfcvt_x_f_v_i32m2_rm +#define SLEEF_RVV_SP_VFCVT_F_X_VF __riscv_vfcvt_f_x_v_f32m2 +#define SLEEF_RVV_SP_VFCVT_X_F_VF_RM __riscv_vfcvt_x_f_v_i32m2_rm +#define SLEEF_RVV_DP_VCAST_VD_D __riscv_vfmv_v_f_f64m2 +#define SLEEF_RVV_DP_VCAST_VD_VI(x) __riscv_vfwcvt_f(x, VECTLENDP) +#define SLEEF_RVV_DP_VCAST_VI_I __riscv_vmv_v_x_i32m1 +#define SLEEF_RVV_DP_VCAST_VM_U __riscv_vmv_v_x_u64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VD __riscv_vreinterpret_f64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VD2 __riscv_vreinterpret_f64m4 +#define SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(x) \ + __riscv_vreinterpret_v_i64m4_i32m4(__riscv_vreinterpret_i64m4(x)) +#define SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(x) \ + __riscv_vreinterpret_f64m4(__riscv_vreinterpret_v_i32m4_i64m4(x)) +#define SLEEF_RVV_DP_VREINTERPRET_4VD __riscv_vreinterpret_f64m8 +#define SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(x) \ + __riscv_vreinterpret_f64m8(__riscv_vreinterpret_v_i32m8_i64m8(x)) +#define SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(x) \ + __riscv_vreinterpret_v_i64m8_i32m8(__riscv_vreinterpret_i64m8(x)) +#define SLEEF_RVV_DP_VREINTERPRET_VM __riscv_vreinterpret_u64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VI64 __riscv_vreinterpret_i64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VU64 __riscv_vreinterpret_u64m2 +#define SLEEF_RVV_DP_VREINTERPRET_VI __riscv_vreinterpret_i32m1 +#define SLEEF_RVV_DP_VREINTERPRET_VI2 __riscv_vreinterpret_i32m1 +#define SLEEF_RVV_DP_VREINTERPRET_2VI __riscv_vreinterpret_i32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VI __riscv_vreinterpret_i32m4 +#define SLEEF_RVV_DP_VREINTERPRET_8VI __riscv_vreinterpret_i32m8 +#define SLEEF_RVV_DP_VREINTERPRET_VU __riscv_vreinterpret_u32m1 +#define SLEEF_RVV_DP_VREINTERPRET_2VU __riscv_vreinterpret_u32m2 +#define SLEEF_RVV_DP_VREINTERPRET_4VU __riscv_vreinterpret_u32m4 +#define SLEEF_RVV_DP_VGET_VM __riscv_vget_u64m2 +#define SLEEF_RVV_DP_VGET_VD __riscv_vget_f64m2 +#define SLEEF_RVV_DP_VGET_VD2 __riscv_vget_f64m4 +#define SLEEF_RVV_DP_VGET_4VD __riscv_vget_f64m4 +#define SLEEF_RVV_DP_VGET_VI __riscv_vget_i32m1 +#define SLEEF_RVV_DP_VGET_VI2 __riscv_vget_i32m1 +#define SLEEF_RVV_DP_VGET_2VI __riscv_vget_i32m2 +#define SLEEF_RVV_DP_VGET_4VI __riscv_vget_i32m4 +#define SLEEF_RVV_DP_VGET_8VI __riscv_vget_i32m8 +#define SLEEF_RVV_DP_VGET_VU __riscv_vget_u32m1 +#define SLEEF_RVV_DP_LOAD_VD __riscv_vle64_v_f64m2 +#define SLEEF_RVV_DP_LOAD_VI __riscv_vle32_v_i32m1 +#define SLEEF_RVV_DP_VFNCVT_X_F_VI __riscv_vfncvt_x_f_w_i32m1_rm +#define SLEEF_RVV_DP_VFCVT_F_X_VD __riscv_vfcvt_f_x_v_f64m2 +#define SLEEF_RVV_DP_VFCVT_X_F_VD_RM __riscv_vfcvt_x_f_v_i64m2_rm + +#else +#error "unknown rvv lmul" +#endif // ENABLE_RVVM1 + +//////////////////////////////////////////////////////////////////////////////// +// Single-Precision Functions +//////////////////////////////////////////////////////////////////////////////// + +/****************************************/ +/* Multi-value and multi-word types */ +/****************************************/ +// fi type +static INLINE vfloat figetd_vf_di(fi_t d) { + return SLEEF_RVV_SP_VREINTERPRET_VF(SLEEF_RVV_SP_VGET_VI2(d, 0)); +} +static INLINE vint2 figeti_vi2_di(fi_t d) { + return SLEEF_RVV_SP_VGET_VI2(d, 1); +} +static INLINE fi_t fisetdi_fi_vf_vi2(vfloat d, vint2 i) { + fi_t res; + res = __riscv_vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_VI2(d)); + res = __riscv_vset(res, 1, i); + return res; +} +static INLINE vfloat2 dfigetdf_vf2_dfi(dfi_t d) { + return SLEEF_RVV_SP_VREINTERPRET_VF2(SLEEF_RVV_SP_VGET_2VI(d, 0)); +} +static INLINE vint2 dfigeti_vi2_dfi(dfi_t d) { + return SLEEF_RVV_SP_VGET_VI2(d, 2); +} +static INLINE dfi_t dfisetdfi_dfi_vf2_vi2(vfloat2 v, vint2 i) { + dfi_t res; + res = __riscv_vset(res, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v)); + res = __riscv_vset(res, 2, i); + return res; +} +static INLINE dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) { + return __riscv_vset(dfi, 0, SLEEF_RVV_SP_VREINTERPRET_2VI(v)); +} +// vfloat2 type +static INLINE vfloat vf2getx_vf_vf2(vfloat2 v) { + return SLEEF_RVV_SP_VGET_VF(v, 0); +} +static INLINE vfloat vf2gety_vf_vf2(vfloat2 v) { + return SLEEF_RVV_SP_VGET_VF(v, 1); +} +static INLINE vfloat2 vf2setxy_vf2_vf_vf(vfloat x, vfloat y) { + vfloat2 res; + res = __riscv_vset(res, 0, x); + res = __riscv_vset(res, 1, y); + return res; +} +static INLINE vfloat2 vf2setx_vf2_vf2_vf(vfloat2 v, vfloat d) { + return __riscv_vset(v, 0, d); +} +static INLINE vfloat2 vf2sety_vf2_vf2_vf(vfloat2 v, vfloat d) { + return __riscv_vset(v, 1, d); +} +// df2 type +static df2 df2setab_df2_vf2_vf2(vfloat2 a, vfloat2 b) { + df2 res; + res = __riscv_vset(res, 0, a); + res = __riscv_vset(res, 1, b); + return res; +} +static vfloat2 df2geta_vf2_df2(df2 d) { return SLEEF_RVV_SP_VGET_VF2(d, 0); } +static vfloat2 df2getb_vf2_df2(df2 d) { return SLEEF_RVV_SP_VGET_VF2(d, 1); } +static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { + return SLEEF_RVV_SP_VREINTERPRET_VI2(vf); +} +static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { + return SLEEF_RVV_SP_VREINTERPRET_VF(vi); +} + + +/****************************************/ +/* Type Conversions and Broadcasts */ +/****************************************/ +static INLINE vfloat vcast_vf_f(float f) { + return SLEEF_RVV_SP_VCAST_VF_F(f, VECTLENSP); +} +static INLINE vfloat vrint_vf_vf(vfloat vd) { + return SLEEF_RVV_SP_VFCVT_F_X_VF(SLEEF_RVV_SP_VFCVT_X_F_VF_RM(vd, __RISCV_FRM_RNE, VECTLENSP), VECTLENSP); +} +static INLINE vfloat vcast_vf_vi2(vint2 vi) { + return __riscv_vfcvt_f(vi, VECTLENSP); +} +static INLINE vint2 vcast_vi2_i(int i) { + return SLEEF_RVV_SP_VCAST_VI2_I(i, VECTLENSP); +} +static INLINE vint2 vrint_vi2_vf(vfloat vf) { + return SLEEF_RVV_SP_VFNCVT_X_F_VI(vf, __RISCV_FRM_RNE, VECTLENSP); +} +static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { + return __riscv_vfcvt_rtz_x(vf, VECTLENSP); +} +static INLINE vfloat vtruncate_vf_vf(vfloat vf) { + return vcast_vf_vi2(vtruncate_vi2_vf(vf)); +} + + +/****************************************/ +/* Memory Operations */ +/****************************************/ +static INLINE vfloat vload_vf_p(const float *ptr) { + return SLEEF_RVV_SP_LOAD_VF(ptr, VECTLENSP); +} +static INLINE vfloat vloadu_vf_p(const float *ptr) { + return SLEEF_RVV_SP_LOAD_VF(ptr, VECTLENSP); +} +static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { + __riscv_vse32(ptr, v, VECTLENSP); +} +static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { + __riscv_vse32(ptr, v, VECTLENSP); +} +static INLINE void vstoreu_v_p_vi2(int32_t *ptr, vint2 v) { + __riscv_vse32(ptr, v, VECTLENSP); +} +static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { + return __riscv_vluxei32(ptr, __riscv_vmul(SLEEF_RVV_SP_VREINTERPRET_VU(vi2), sizeof(float), VECTLENSP), VECTLENSP); +} + + +/****************************************/ +/* Floating-Point Arithmetic */ +/****************************************/ +static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { + return __riscv_vfadd(x, y, VECTLENSP); +} +static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { + return __riscv_vfsub(x, y, VECTLENSP); +} +static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { + return __riscv_vfmul(x, y, VECTLENSP); +} +static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { + return __riscv_vfdiv(x, y, VECTLENSP); +} +static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { + return __riscv_vfmax(x, y, VECTLENSP); +} +static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { + return __riscv_vfmin(x, y, VECTLENSP); +} +static INLINE vfloat vrec_vf_vf(vfloat d) { + return __riscv_vfdiv(vcast_vf_f(1.0f), d, VECTLENSP); +} +static INLINE vfloat vsqrt_vf_vf(vfloat d) { + return __riscv_vfsqrt(d, VECTLENSP); +} +#if defined(ENABLE_FMA_SP) +// Multiply accumulate: z = z + x * y +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return __riscv_vfmadd(x, y, z, VECTLENSP); +} +// Multiply subtract: z = z - x * y +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return __riscv_vfnmsub(x, y, z, VECTLENSP); +} +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { + return __riscv_vfmsub(x, y, z, VECTLENSP); +} +#else +static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } +static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } +#endif +// fused multiply add / sub +static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // (x * y) + z + return __riscv_vfmadd(x, y, z, VECTLENSP); +} +static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // -(x * y) + z + return __riscv_vfnmsub(x, y, z, VECTLENSP); +} +static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // (x * y) - z + return __riscv_vfmsub(x, y, z, VECTLENSP); +} +// sign manipulation +static INLINE vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) { + return __riscv_vfsgnjx(x, y, VECTLENSP); +} +static INLINE vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) { + return __riscv_vfsgnj(x, y, VECTLENSP); +} +static INLINE vfloat vsign_vf_vf(vfloat f) { + return __riscv_vfsgnj(SLEEF_RVV_SP_VCAST_VF_F(1.0f, VECTLENSP), f, VECTLENSP); +} +static INLINE vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) { + vint2 xi = SLEEF_RVV_SP_VREINTERPRET_VI2(x); + vint2 yi = SLEEF_RVV_SP_VREINTERPRET_VI2(y); + vint2 xioryi = __riscv_vor(xi, yi, VECTLENSP); + vfloat xory = SLEEF_RVV_SP_VREINTERPRET_VF(xioryi); + return __riscv_vfsgnj(x, xory, VECTLENSP); +} +static INLINE vfloat vabs_vf_vf(vfloat f) { + return __riscv_vfabs(f, VECTLENSP); +} +static INLINE vfloat vneg_vf_vf(vfloat f) { + return __riscv_vfneg(f, VECTLENSP); +} + + +/****************************************/ +/* Integer Arithmetic and Logic */ +/****************************************/ +static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { + return __riscv_vadd(x, y, VECTLENSP); +} +static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { + return __riscv_vsub(x, y, VECTLENSP); +} +static INLINE vint2 vneg_vi2_vi2(vint2 x) { + return __riscv_vneg(x, VECTLENSP); +} +static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { + return __riscv_vand(x, y, VECTLENSP); +} +static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { + return __riscv_vand(__riscv_vnot(x, VECTLENSP), y, VECTLENSP); +} +static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { + return __riscv_vor(x, y, VECTLENSP); +} +static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { + return __riscv_vxor(x, y, VECTLENSP); +} +static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { + return __riscv_vsll(x, c, VECTLENSP); +} +static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { + return __riscv_vsra(x, c, VECTLENSP); +} +static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { + return SLEEF_RVV_SP_VREINTERPRET_VI2(__riscv_vsrl(SLEEF_RVV_SP_VREINTERPRET_VU2(x), c, VECTLENSP)); +} + +#ifdef ENABLE_RVV_SP +/****************************************/ +/* Bitmask Operations */ +/****************************************/ +static INLINE vfloat vreinterpret_vf_vm(vmask vm) { + return SLEEF_RVV_SP_VREINTERPRET_VF(__riscv_vncvt_x(vm, VECTLENSP)); +} +static INLINE vmask vreinterpret_vm_vf(vfloat vf) { + return __riscv_vwcvtu_x(SLEEF_RVV_SP_VREINTERPRET_VU(vf), VECTLENSP); +} +static INLINE int vtestallones_i_vo32(vopmask g) { + return __riscv_vcpop(g, VECTLENSP) == VECTLENSP; +} +static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) { + return SLEEF_RVV_SP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENSP); +} +static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { + return __riscv_vand(x, y, VECTLENSP); +} +static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { + return __riscv_vor(x, y, VECTLENSP); +} +static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { + return __riscv_vxor(x, y, VECTLENSP); +} +static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { + return __riscv_vand(SLEEF_RVV_SP_VREINTERPRET_VM(__riscv_vnot(SLEEF_RVV_SP_VREINTERPRET_VI64(x), VECTLENSP)), y, VECTLENSP); +} +static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { + return __riscv_vmerge(y, -1, x, VECTLENSP); +} +static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { + return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENSP), VECTLENSP); +} +static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { + return __riscv_vmerge(y, 0, x, VECTLENSP); +} + + +/****************************************/ +/* Logical Mask Operations */ +/****************************************/ +static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { + return __riscv_vmand(x, y, VECTLENSP); +} +static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { + return __riscv_vmandn(y, x, VECTLENSP); +} +static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { + return __riscv_vmor(x, y, VECTLENSP); +} +static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { + return __riscv_vmxor(x, y, VECTLENSP); +} +// single precision FP comparison +static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { + return __riscv_vmfeq(x, y, VECTLENSP); +} +static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { + return __riscv_vmfne(x, y, VECTLENSP); +} +static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { + return __riscv_vmfgt(x, y, VECTLENSP); +} +static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { + return __riscv_vmfge(x, y, VECTLENSP); +} +static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { + return __riscv_vmflt(x, y, VECTLENSP); +} +static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { + return __riscv_vmfle(x, y, VECTLENSP); +} +static INLINE vopmask visnan_vo_vf(vfloat d) { + return __riscv_vmfne(d, d, VECTLENSP); +} +static INLINE vopmask visinf_vo_vf(vfloat d) { + return __riscv_vmfeq(__riscv_vfabs(d, VECTLENSP), SLEEF_INFINITYf, VECTLENSP); +} +static INLINE vopmask vispinf_vo_vf(vfloat d) { + return __riscv_vmfeq(d, SLEEF_INFINITYf, VECTLENSP); +} +// conditional select +static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { + return __riscv_vmerge(y, x, mask, VECTLENSP); +} +static INLINE vfloat vsel_vf_vo_f_f(vopmask mask, float v1, float v0) { + return __riscv_vfmerge(vcast_vf_f(v0), v1, mask, VECTLENSP); +} +static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { + return __riscv_vfmerge(__riscv_vfmerge(vcast_vf_f(d2), d1, o1, VECTLENSP), d0, o0, VECTLENSP); +} +static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { + return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vf_f(d3), d2, o2, VECTLENSP), d1, o1, VECTLENSP), d0, o0, VECTLENSP); +} +// integer comparison +static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { + return __riscv_vmseq(x, y, VECTLENSP); +} +static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { + return __riscv_vmsgt(x, y, VECTLENSP); +} +static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { + vint2 zero = vcast_vi2_i(0); + return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, VECTLENSP), VECTLENSP); +} +// integer conditional select +static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { + return __riscv_vmerge(y, x, m, VECTLENSP); +} +static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { + return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENSP), VECTLENSP); +} +#endif // ENABLE_RVV_SP + + +//////////////////////////////////////////////////////////////////////////////// +// Double-Precision Functions +//////////////////////////////////////////////////////////////////////////////// + +/****************************************/ +/* Multi-value and multi-word types */ +/****************************************/ +// vdouble2 type +static INLINE const vdouble vd2getx_vd_vd2(vdouble2 v) { + return SLEEF_RVV_DP_VGET_VD(v, 0); +} +static INLINE const vdouble vd2gety_vd_vd2(vdouble2 v) { + return SLEEF_RVV_DP_VGET_VD(v, 1); +} +static INLINE const vdouble2 vd2setxy_vd2_vd_vd(vdouble x, vdouble y) { + vdouble2 res; + res = __riscv_vset(res, 0, x); + res = __riscv_vset(res, 1, y); + return res; +} +static INLINE const vdouble2 vd2setx_vd2_vd2_vd(vdouble2 v, vdouble d) { + return __riscv_vset(v, 0, d); +} +static INLINE const vdouble2 vd2sety_vd2_vd2_vd(vdouble2 v, vdouble d) { + return __riscv_vset(v, 1, d); +} +// dd2 type +static dd2 dd2setab_dd2_vd2_vd2(vdouble2 a, vdouble2 b) { + dd2 res; + res = __riscv_vset(res, 0, a); + res = __riscv_vset(res, 1, b); + return res; +} +static vdouble2 dd2geta_vd2_dd2(dd2 d) { return SLEEF_RVV_DP_VGET_4VD(d, 0); } +static vdouble2 dd2getb_vd2_dd2(dd2 d) { return SLEEF_RVV_DP_VGET_4VD(d, 1); } +// vdouble3 type +static INLINE vdouble vd3getx_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 0); } +static INLINE vdouble vd3gety_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 1); } +static INLINE vdouble vd3getz_vd_vd3(vdouble3 v) { return SLEEF_RVV_DP_VGET_VD(v, 2); } +static INLINE vdouble3 vd3setxyz_vd3_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + vdouble3 res; + res = __riscv_vset(res, 0, x); + res = __riscv_vset(res, 1, y); + res = __riscv_vset(res, 2, z); + return res; +} +static INLINE vdouble3 vd3setx_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 0, d); } +static INLINE vdouble3 vd3sety_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 1, d); } +static INLINE vdouble3 vd3setz_vd3_vd3_vd(vdouble3 v, vdouble d) { return __riscv_vset(v, 2, d); } +// di type +static INLINE vdouble digetd_vd_di(di_t d) { + return SLEEF_RVV_DP_VGET_VD(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(d), 0); +} +static INLINE vint digeti_vi_di(di_t d) { +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) + return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 1)); +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) + return SLEEF_RVV_DP_VGET_VI(d, 2); +#else +#error "unknown rvv lmul" +#endif +} +static INLINE di_t disetdi_di_vd_vi(vdouble d, vint i) { + di_t res; + res = SLEEF_RVV_DP_VREINTERPRET_4VI_VD2(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_VD2_4VI(res), 0, d)); +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) + res = __riscv_vset(res, 1, __riscv_vlmul_ext_i32m1(i)); +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) + res = __riscv_vset(res, 2, i); +#else +#error "unknown rvv lmul" +#endif + return res; +} +// ddi type +static INLINE vdouble2 ddigetdd_vd2_ddi(ddi_t d) { + return SLEEF_RVV_DP_VGET_VD2(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(d), 0); +} +static INLINE vint ddigeti_vi_ddi(ddi_t d) { +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) + return __riscv_vlmul_trunc_i32mf2(SLEEF_RVV_DP_VGET_VI(d, 2)); +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) + return SLEEF_RVV_DP_VGET_VI(d, 4); +#else +#error "unknown rvv lmul" +#endif +} +static INLINE ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) { + ddi_t res; + res = SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(res), 0, v)); +#if defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) + res = __riscv_vset(res, 2, __riscv_vlmul_ext_i32m1(i)); +#elif defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) + res = __riscv_vset(res, 4, i); +#else +#error "unknown rvv lmul" +#endif + return res; +} +static INLINE ddi_t ddisetdd_ddi_ddi_vd2(ddi_t ddi, vdouble2 v) { + return SLEEF_RVV_DP_VREINTERPRET_8VI_4VD(__riscv_vset(SLEEF_RVV_DP_VREINTERPRET_4VD_8VI(ddi), 0, v)); +} + +/****************************************/ +/* Type Conversions and Broadcasts */ +/****************************************/ +static INLINE vdouble vcast_vd_d(double d) { + return SLEEF_RVV_DP_VCAST_VD_D(d, VECTLENDP); +} +static INLINE vdouble vcast_vd_vi(vint i) { + return SLEEF_RVV_DP_VCAST_VD_VI(i); +} +static INLINE vint vcast_vi_i(int32_t i) { + return SLEEF_RVV_DP_VCAST_VI_I(i, VECTLENDP); +} +static INLINE vint vrint_vi_vd(vdouble vd) { + return SLEEF_RVV_DP_VFNCVT_X_F_VI(vd, __RISCV_FRM_RNE, VECTLENDP); +} +static INLINE vdouble vrint_vd_vd(vdouble vd) { + return SLEEF_RVV_DP_VFCVT_F_X_VD(SLEEF_RVV_DP_VFCVT_X_F_VD_RM(vd, __RISCV_FRM_RNE, VECTLENDP), VECTLENDP); +} +static INLINE vint vtruncate_vi_vd(vdouble vd) { + return __riscv_vfncvt_rtz_x(vd, VECTLENDP); +} +static INLINE vdouble vtruncate_vd_vd(vdouble vd) { + return vcast_vd_vi(vtruncate_vi_vd(vd)); +} + + +/****************************************/ +/* Memory Operations */ +/****************************************/ +static INLINE vdouble vload_vd_p(const double *ptr) { + return SLEEF_RVV_DP_LOAD_VD(ptr, VECTLENDP); +} +static INLINE vdouble vloadu_vd_p(const double *ptr) { + return SLEEF_RVV_DP_LOAD_VD(ptr, VECTLENDP); +} +static INLINE vint vloadu_vi_p(int32_t *p) { + return SLEEF_RVV_DP_LOAD_VI(p, VECTLENDP); +} +static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { + __riscv_vse64(ptr, v, VECTLENDP); +} +static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { + __riscv_vse64(ptr, v, VECTLENDP); +} +static INLINE void vstoreu_v_p_vi(int32_t *ptr, vint v) { + __riscv_vse32(ptr, v, VECTLENDP); +} +static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { + return __riscv_vluxei64(ptr, __riscv_vwmulu(SLEEF_RVV_DP_VREINTERPRET_VU(vi), sizeof(double), VECTLENDP), VECTLENDP); +} + + +/****************************************/ +/* Floating-Point Arithmetic */ +/****************************************/ +static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { + return __riscv_vfadd(x, y, VECTLENDP); +} +static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { + return __riscv_vfsub(x, y, VECTLENDP); +} +static INLINE vdouble vrec_vd_vd(vdouble d) { + return __riscv_vfdiv(vcast_vd_d(1.0), d, VECTLENDP); +} +static INLINE vdouble vabs_vd_vd(vdouble d) { + return __riscv_vfabs(d, VECTLENDP); +} +static INLINE vdouble vsqrt_vd_vd(vdouble d) { + return __riscv_vfsqrt(d, VECTLENDP); +} +static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { + return __riscv_vfmul(x, y, VECTLENDP); +} +static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { + return __riscv_vfdiv(x, y, VECTLENDP); +} +static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { + return __riscv_vfmax(x, y, VECTLENDP); +} +static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { + return __riscv_vfmin(x, y, VECTLENDP); +} +#if defined(ENABLE_FMA_DP) +// Multiply accumulate: z = z + x * y +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return __riscv_vfmadd(x, y, z, VECTLENDP); +} +// Multiply subtract: z = z - x * y +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return __riscv_vfmsub(x, y, z, VECTLENDP); +} +#else +static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } +#endif +// fused multiply add / sub +static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return __riscv_vfmadd(x, y, z, VECTLENDP); +} +static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return __riscv_vfnmsub(x, y, z, VECTLENDP); +} +static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { + return __riscv_vfmsub(x, y, z, VECTLENDP); +} +// sign manipulation +static INLINE vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) { + return __riscv_vfsgnjx(x, y, VECTLENDP); +} +static INLINE vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) { + return __riscv_vfsgnj(x, y, VECTLENDP); +} +static INLINE vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) { + return __riscv_vfsgnj(x, SLEEF_RVV_DP_VREINTERPRET_VD(__riscv_vor(SLEEF_RVV_DP_VREINTERPRET_VM(x), SLEEF_RVV_DP_VREINTERPRET_VM(y), VECTLENDP)), VECTLENDP); +} +static INLINE vdouble vneg_vd_vd(vdouble d) { + return __riscv_vfneg(d, VECTLENDP); +} + + +/****************************************/ +/* Integer Arithmetic and Logic */ +/****************************************/ +static INLINE vint vadd_vi_vi_vi(vint x, vint y) { + return __riscv_vadd(x, y, VECTLENDP); +} +static INLINE vint vsub_vi_vi_vi(vint x, vint y) { + return __riscv_vsub(x, y, VECTLENDP); +} +static INLINE vint vneg_vi_vi(vint x) { + return __riscv_vneg(x, VECTLENDP); +} +static INLINE vint vand_vi_vi_vi(vint x, vint y) { + return __riscv_vand(x, y, VECTLENDP); +} +static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { + return __riscv_vand(__riscv_vnot(x, VECTLENDP), y, VECTLENDP); +} +static INLINE vint vor_vi_vi_vi(vint x, vint y) { + return __riscv_vor(x, y, VECTLENDP); +} +static INLINE vint vxor_vi_vi_vi(vint x, vint y) { + return __riscv_vxor(x, y, VECTLENDP); +} +static INLINE vint vsll_vi_vi_i(vint x, int c) { + return __riscv_vsll(x, c, VECTLENDP); +} +static INLINE vint vsra_vi_vi_i(vint x, int c) { + return __riscv_vsra(x, c, VECTLENDP); +} +static INLINE vint vsrl_vi_vi_i(vint x, int c) { + return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vsrl(SLEEF_RVV_DP_VREINTERPRET_VU(x), c, VECTLENDP)); +} + + +#ifdef ENABLE_RVV_DP +/****************************************/ +/* Bitmask Operations */ +/****************************************/ +static INLINE vmask vcast_vm_i64(int64_t c) { + return SLEEF_RVV_DP_VCAST_VM_U(c, VECTLENDP); +} +static INLINE vmask vcast_vm_u64(uint64_t c) { + return SLEEF_RVV_DP_VCAST_VM_U(c, VECTLENDP); +} +static INLINE vmask vcast_vm_i_i(int64_t h, int64_t l) { + return SLEEF_RVV_DP_VCAST_VM_U((((uint64_t)h) << 32) | (uint32_t) l, VECTLENDP); +} +static INLINE vmask vcast_vm_vi(vint vi) { + return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vwcvt_x(vi, VECTLENDP)); +} +static INLINE vmask vcastu_vm_vi(vint vi) { + return __riscv_vsll(SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vwcvt_x(vi, VECTLENDP)), 32, VECTLENDP); +} +static INLINE vint vcastu_vi_vm(vmask vm) { + return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vnsrl(vm, 32, VECTLENDP)); +} +static INLINE vint vcast_vi_vm(vmask vm) { + return SLEEF_RVV_DP_VREINTERPRET_VI(__riscv_vncvt_x(vm, VECTLENDP)); +} +static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { + return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENDP), VECTLENDP); +} +static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { + return __riscv_vand(x, y, VECTLENDP); +} +static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { + return __riscv_vor(x, y, VECTLENDP); +} +static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { + return __riscv_vxor(x, y, VECTLENDP); +} +static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { + return __riscv_vand(SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vnot(SLEEF_RVV_DP_VREINTERPRET_VI64(x), VECTLENDP)), y, VECTLENDP); +} +static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { + return __riscv_vmerge(y, 0, x, VECTLENDP); +} +static INLINE vmask vsll64_vm_vm_i(vmask mask, int64_t c) { + return __riscv_vsll(mask, c, VECTLENDP); +} +static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { + return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vsub(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP)); +} +static INLINE vmask vsrl64_vm_vm_i(vmask mask, int64_t c) { + return __riscv_vsrl(mask, c, VECTLENDP); +} +static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { + return __riscv_vadd(x, y, VECTLENDP); +} +static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { + return __riscv_vmerge(y, -1, x, VECTLENDP); +} +static INLINE vmask vsel_vm_vo64_vm_vm(vopmask mask, vmask x, vmask y) { + return __riscv_vmerge(y, x, mask, VECTLENDP); +} +static INLINE vmask vneg64_vm_vm(vmask mask) { + return SLEEF_RVV_DP_VREINTERPRET_VM(__riscv_vneg(SLEEF_RVV_DP_VREINTERPRET_VI64(mask), VECTLENDP)); +} +static INLINE vdouble vreinterpret_vd_vm(vmask vm) { + return SLEEF_RVV_DP_VREINTERPRET_VD(vm); +} +static INLINE vmask vreinterpret_vm_vd(vdouble vd) { + return SLEEF_RVV_DP_VREINTERPRET_VM(vd); +} + +// vquad type +static INLINE const vmask vqgetx_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v, 0); } +static INLINE const vmask vqgety_vm_vq(vquad v) { return SLEEF_RVV_DP_VGET_VM(v, 1); } +static INLINE vquad vqsetxy_vq_vm_vm(vmask x, vmask y) { + vquad res; + res = __riscv_vset(res, 0, x); + res = __riscv_vset(res, 1, y); + return res; +} +static INLINE vquad vqsetx_vq_vq_vm(vquad v, vmask x) { return __riscv_vset(v, 0, x); } +static INLINE vquad vqsety_vq_vq_vm(vquad v, vmask y) { return __riscv_vset(v, 1, y); } + + + +/****************************************/ +/* Logical Mask Operations */ +/****************************************/ +static INLINE vopmask vcast_vo64_vo32(vopmask vo) { + return vo; +} +static INLINE vopmask vcast_vo32_vo64(vopmask vo) { + return vo; +} +static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { + return __riscv_vmand(x, y, VECTLENDP); +} +static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { + return __riscv_vmandn(y, x, VECTLENDP); +} +static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { + return __riscv_vmor(x, y, VECTLENDP); +} +static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { + return __riscv_vmxor(x, y, VECTLENDP); +} +static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { + return __riscv_vmseq(x, y, VECTLENDP); +} +static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { + return __riscv_vmsgt(SLEEF_RVV_DP_VREINTERPRET_VI64(x), SLEEF_RVV_DP_VREINTERPRET_VI64(y), VECTLENDP); +} +// double-precision comparison +static INLINE vopmask visinf_vo_vd(vdouble d) { + return __riscv_vmfeq(__riscv_vfabs(d, VECTLENDP), SLEEF_INFINITY, VECTLENDP); +} +static INLINE vopmask vispinf_vo_vd(vdouble d) { + return __riscv_vmfeq(d, SLEEF_INFINITY, VECTLENDP); +} +static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { + return __riscv_vmfeq(x, y, VECTLENDP); +} +static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { + return __riscv_vmfne(x, y, VECTLENDP); +} +static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { + return __riscv_vmflt(x, y, VECTLENDP); +} +static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { + return __riscv_vmfle(x, y, VECTLENDP); +} +static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { + return __riscv_vmfgt(x, y, VECTLENDP); +} +static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { + return __riscv_vmfge(x, y, VECTLENDP); +} +static INLINE vopmask visnan_vo_vd(vdouble d) { + return __riscv_vmfne(d, d, VECTLENDP); +} +// double-precision conditional select +static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) { + return __riscv_vmerge(y, x, mask, VECTLENDP); +} +static INLINE vdouble vsel_vd_vo_d_d(vopmask mask, double v0, double v1) { + return __riscv_vfmerge(vcast_vd_d(v1), v0, mask, VECTLENDP); +} +static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { + return __riscv_vfmerge(__riscv_vfmerge(vcast_vd_d(d2), d1, o1, VECTLENDP), d0, o0, VECTLENDP); +} +static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { + return __riscv_vfmerge(__riscv_vfmerge(__riscv_vfmerge(vcast_vd_d(d3), d2, o2, VECTLENDP), d1, o1, VECTLENDP), d0, o0, VECTLENDP); +} +static INLINE int vtestallones_i_vo64(vopmask g) { + return __riscv_vcpop(g, VECTLENDP) == VECTLENDP; +} +// integer comparison +static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { + return __riscv_vmseq(x, y, VECTLENDP); +} +static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { + return __riscv_vmsgt(x, y, VECTLENDP); +} +static INLINE vint vgt_vi_vi_vi(vint x, vint y) { + vint zero = vcast_vi_i(0); + return __riscv_vmerge(zero, -1, __riscv_vmsgt(x, y, VECTLENDP), VECTLENDP); +} +// integer conditional select +static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { + return __riscv_vmerge(y, x, m, VECTLENDP); +} +static INLINE vint vandnot_vi_vo_vi(vopmask mask, vint vi) { + return __riscv_vmerge(vi, 0, mask, VECTLENDP); +} +static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { + return __riscv_vmerge(y, 0, __riscv_vmnot(x, VECTLENDP), VECTLENDP); +} +#endif // ENABLE_RVV_DP + +#endif // HELPERRVV_H diff --git a/src/common/commonfuncs.h b/src/common/commonfuncs.h index 2f1a0da9..274156fc 100644 --- a/src/common/commonfuncs.h +++ b/src/common/commonfuncs.h @@ -3,7 +3,7 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) typedef struct { vdouble x, y, z; } vdouble3; @@ -210,14 +210,17 @@ static INLINE CONST VECTOR_CC vdouble vtoward0_vd_vd(vdouble x) { // returns nex return vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vcast_vd_d(0), t); } +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static INLINE CONST vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y))); } +#endif static INLINE CONST VECTOR_CC vdouble vsign_vd_vd(vdouble d) { return vmulsign_vd_vd_vd(vcast_vd_d(1.0), d); } +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static INLINE CONST VECTOR_CC vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y))); } @@ -226,6 +229,7 @@ static INLINE CONST VECTOR_CC vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vandnot_vm_vm_vm(vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(x)), vand_vm_vm_vm (vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(y)))); } +#endif static INLINE CONST VECTOR_CC vdouble vtruncate2_vd_vd(vdouble x) { #ifdef FULL_FP_ROUNDING diff --git a/src/common/dd.h b/src/common/dd.h index b1423556..3431e42d 100644 --- a/src/common/dd.h +++ b/src/common/dd.h @@ -3,7 +3,7 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) #if !defined(ENABLE_CUDA) typedef struct { vdouble x, y; diff --git a/src/common/df.h b/src/common/df.h index 4e3e7949..a14c1c6a 100644 --- a/src/common/df.h +++ b/src/common/df.h @@ -3,7 +3,7 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) #if !defined(ENABLE_CUDA) typedef struct { vfloat x, y; diff --git a/src/libm-tester/CMakeLists.txt b/src/libm-tester/CMakeLists.txt index 69f2ba6e..41d6f36f 100644 --- a/src/libm-tester/CMakeLists.txt +++ b/src/libm-tester/CMakeLists.txt @@ -27,6 +27,11 @@ set(TESTER3_DEFINITIONS_VXENOFMA ATR=cinz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SL set(TESTER3_DEFINITIONS_VXE2 ATR=finz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vxe2) set(TESTER3_DEFINITIONS_VXE2NOFMA ATR=cinz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vxe2nofma) +set(TESTER3_DEFINITIONS_RVVM1 ATR=finz_ DPTYPE=vfloat64m1_t SPTYPE=vfloat32m1_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm1 ENABLE_RVVM1) +set(TESTER3_DEFINITIONS_RVVM1NOFMA ATR=cinz_ DPTYPE=vfloat64m1_t SPTYPE=vfloat32m1_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm1nofma ENABLE_RVVM1) +set(TESTER3_DEFINITIONS_RVVM2 ATR=finz_ DPTYPE=vfloat64m2_t SPTYPE=vfloat32m2_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm2 ENABLE_RVVM2) +set(TESTER3_DEFINITIONS_RVVM2NOFMA ATR=cinz_ DPTYPE=vfloat64m2_t SPTYPE=vfloat32m2_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=rvvm2nofma ENABLE_RVVM2) + set(TESTER3_DEFINITIONS_PUREC_SCALAR ATR=cinz_ DPTYPE=double SPTYPE=float DPTYPESPEC=d1 SPTYPESPEC=f1 EXTSPEC=purec) set(TESTER3_DEFINITIONS_PURECFMA_SCALAR ATR=finz_ DPTYPE=double SPTYPE=float DPTYPESPEC=d1 SPTYPESPEC=f1 EXTSPEC=purecfma) @@ -47,6 +52,9 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") set(TEST3_CINZ purec_scalar vxenofma vxe2nofma) set(TEST3_FINZ purecfma_scalar vxe vxe2) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") + set(TEST3_CINZ purec_scalar rvvm1nofma rvvm2nofma) + set(TEST3_FINZ purecfma_scalar rvvm1 rvvm2) endif() # diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c index 002cb0f1..03fcd743 100644 --- a/src/libm-tester/iutsimd.c +++ b/src/libm-tester/iutsimd.c @@ -343,6 +343,30 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif +#ifdef ENABLE_RVVM1 +#define CONFIG 1 +#include "helperrvv.h" +#include "renamervvm1.h" +#endif + +#ifdef ENABLE_RVVM1NOFMA +#define CONFIG 2 +#include "helperrvv.h" +#include "renamervvm1nofma.h" +#endif + +#ifdef ENABLE_RVVM2 +#define CONFIG 1 +#include "helperrvv.h" +#include "renamervvm2.h" +#endif + +#ifdef ENABLE_RVVM2NOFMA +#define CONFIG 2 +#include "helperrvv.h" +#include "renamervvm2nofma.h" +#endif + #ifdef ENABLE_PUREC_SCALAR #include "renamepurec_scalar.h" #if !defined(USE_INLINE_HEADER) @@ -426,12 +450,12 @@ int check_feature(double d, float f) { return 0; } -#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(USE_INLINE_HEADER)) +#if defined(ENABLE_DP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) || defined(USE_INLINE_HEADER)) static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; } static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; } #endif -#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(USE_INLINE_HEADER)) +#if defined(ENABLE_SP) && !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA) || defined(USE_INLINE_HEADER)) static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; } static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; } #endif diff --git a/src/libm-tester/tester2simddp.c b/src/libm-tester/tester2simddp.c index 540d1142..5071bb70 100644 --- a/src/libm-tester/tester2simddp.c +++ b/src/libm-tester/tester2simddp.c @@ -191,6 +191,38 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif +#ifdef ENABLE_RVVM1 +#define CONFIG 1 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm1.h" +#include "sleef.h" +#endif + +#ifdef ENABLE_RVVM1NOFMA +#define CONFIG 2 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm1nofma.h" +#include "sleef.h" +#endif + +#ifdef ENABLE_RVVM2 +#define CONFIG 1 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm2.h" +#include "sleef.h" +#endif + +#ifdef ENABLE_RVVM2NOFMA +#define CONFIG 2 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm2nofma.h" +#include "sleef.h" +#endif + #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" @@ -209,7 +241,7 @@ typedef Sleef_float_2 vfloat2; // -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; } static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; } #endif diff --git a/src/libm-tester/tester2simdsp.c b/src/libm-tester/tester2simdsp.c index d140ba4b..3fb1e619 100644 --- a/src/libm-tester/tester2simdsp.c +++ b/src/libm-tester/tester2simdsp.c @@ -191,6 +191,38 @@ typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif +#ifdef ENABLE_RVVM1 +#define CONFIG 1 +#define ENABLE_RVV_SP +#include "helperrvv.h" +#include "renamervvm1.h" +#include "sleef.h" +#endif + +#ifdef ENABLE_RVVM1NOFMA +#define CONFIG 2 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm1nofma.h" +#include "sleef.h" +#endif + +#ifdef ENABLE_RVVM2 +#define CONFIG 1 +#define ENABLE_RVV_SP +#include "helperrvv.h" +#include "renamervvm2.h" +#include "sleef.h" +#endif + +#ifdef ENABLE_RVVM2NOFMA +#define CONFIG 2 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#include "renamervvm2nofma.h" +#include "sleef.h" +#endif + #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" @@ -209,7 +241,7 @@ typedef Sleef_float_2 vfloat2; // -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; } static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; } #endif diff --git a/src/libm-tester/tester3.c b/src/libm-tester/tester3.c index f4b27e56..a55404ed 100644 --- a/src/libm-tester/tester3.c +++ b/src/libm-tester/tester3.c @@ -101,6 +101,44 @@ static INLINE __attribute__((vector_size(16))) float setSLEEF_VECTOR_FLOAT(float static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float v, int r) { return unifyValuef(v[r & 3]); } #endif +#if __riscv && __riscv_v + +#if defined(ENABLE_RVVM1) +#define VECTLENSP (1 * __riscv_vlenb() / sizeof(float)) +#define VECTLENDP (1 * __riscv_vlenb() / sizeof(double)) + +static INLINE vfloat32m1_t setvfloat32m1_t(float d, int r) { float a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m1(a, VECTLENSP); } +static INLINE float getvfloat32m1_t(vfloat32m1_t v, int r) { float a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); } +static INLINE vfloat64m1_t setvfloat64m1_t(double d, int r) { double a[VECTLENDP]; memrand(a, sizeof(a)); a[r & (VECTLENDP-1)] = d; return __riscv_vle64_v_f64m1(a, VECTLENDP); } +static INLINE double getvfloat64m1_t(vfloat64m1_t v, int r) { double a[VECTLENDP]; __riscv_vse64(a, v, VECTLENDP); return unifyValue(a[r & (VECTLENDP-1)]); } + +static vfloat32m1_t vf2getx_vf_vf2(vfloat32m2_t v) { return __riscv_vget_f32m1(v, 0); } +static vfloat32m1_t vf2gety_vf_vf2(vfloat32m2_t v) { return __riscv_vget_f32m1(v, 1); } +static vfloat64m1_t vd2getx_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 0); } +static vfloat64m1_t vd2gety_vd_vd2(vfloat64m2_t v) { return __riscv_vget_f64m1(v, 1); } + +#elif defined(ENABLE_RVVM2) +#define VECTLENSP (2 * __riscv_vlenb() / sizeof(float)) +#define VECTLENDP (2 * __riscv_vlenb() / sizeof(double)) + +static INLINE vfloat32m2_t setvfloat32m2_t(float d, int r) { float a[VECTLENSP]; memrand(a, sizeof(a)); a[r & (VECTLENSP-1)] = d; return __riscv_vle32_v_f32m2(a, VECTLENSP); } +static INLINE float getvfloat32m2_t(vfloat32m2_t v, int r) { float a[VECTLENSP]; __riscv_vse32(a, v, VECTLENSP); return unifyValuef(a[r & (VECTLENSP-1)]); } +static INLINE vfloat64m2_t setvfloat64m2_t(double d, int r) { double a[VECTLENDP]; memrand(a, sizeof(a)); a[r & (VECTLENDP-1)] = d; return __riscv_vle64_v_f64m2(a, VECTLENDP); } +static INLINE double getvfloat64m2_t(vfloat64m2_t v, int r) { double a[VECTLENDP]; __riscv_vse64(a, v, VECTLENDP); return unifyValue(a[r & (VECTLENDP-1)]); } + +static vfloat32m2_t vf2getx_vf_vf2(vfloat32m4_t v) { return __riscv_vget_f32m2(v, 0); } +static vfloat32m2_t vf2gety_vf_vf2(vfloat32m4_t v) { return __riscv_vget_f32m2(v, 1); } +static vfloat64m2_t vd2getx_vd_vd2(vfloat64m4_t v) { return __riscv_vget_f64m2(v, 0); } +static vfloat64m2_t vd2gety_vd_vd2(vfloat64m4_t v) { return __riscv_vget_f64m2(v, 1); } + +#else +#error "unknown RVV" +#endif + +#undef VECTLENSP +#undef VECTLENDP +#endif + // // ATR = cinz_, NAME = sin, TYPE = d2, ULP = u35, EXT = sse2 @@ -110,7 +148,7 @@ static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float #define SET(TYPE) set ## TYPE #define GET(TYPE) get ## TYPE -#ifndef __ARM_FEATURE_SVE +#if !defined(__ARM_FEATURE_SVE) && !(defined(__riscv) && defined(__riscv_v)) static DPTYPE vd2getx_vd_vd2(TYPE2(DPTYPE) v) { return v.x; } static DPTYPE vd2gety_vd_vd2(TYPE2(DPTYPE) v) { return v.y; } static SPTYPE vf2getx_vf_vf2(TYPE2(SPTYPE) v) { return v.x; } diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt index fe0a5d39..a0e4029d 100644 --- a/src/libm/CMakeLists.txt +++ b/src/libm/CMakeLists.txt @@ -60,6 +60,15 @@ elseif(SLEEF_ARCH_S390X) PURECFMA_SCALAR DSP_SCALAR ) +elseif(SLEEF_ARCH_RISCV64) + set(SLEEF_HEADER_LIST + RVVM1 + RVVM1NOFMA + RVVM2 + RVVM2NOFMA + PUREC_SCALAR + PURECFMA_SCALAR + ) endif() # HEADER_PARAMS @@ -98,6 +107,11 @@ command_arguments(HEADER_PARAMS_VXENOFMA cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" command_arguments(HEADER_PARAMS_VXE2 finz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2) command_arguments(HEADER_PARAMS_VXE2NOFMA cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ vxe2nofma) +command_arguments(HEADER_PARAMS_RVVM1 finz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v rvvm1) +command_arguments(HEADER_PARAMS_RVVM1NOFMA cinz_ x x vfloat64m1_t vfloat32m1_t vint32mf2_t vint32m1_t __riscv_v rvvm1nofma) +command_arguments(HEADER_PARAMS_RVVM2 finz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v rvvm2) +command_arguments(HEADER_PARAMS_RVVM2NOFMA cinz_ x x vfloat64m2_t vfloat32m2_t vint32m1_t vint32m2_t __riscv_v rvvm2nofma) + command_arguments(HEADER_PARAMS_DSP_SCALAR - 1 1 double float int32_t int32_t __STDC__) command_arguments(HEADER_PARAMS_PUREC_SCALAR cinz_ 1 1 double float int32_t int32_t __STDC__ purec) command_arguments(HEADER_PARAMS_PURECFMA_SCALAR finz_ 1 1 double float int32_t int32_t __STDC__ purecfma) @@ -144,6 +158,11 @@ command_arguments(RENAME_PARAMS_GNUABI_ADVSIMD advsimd n 2 4 float64x2_t float3 # the "x" token of VLA SVE vector functions. command_arguments(RENAME_PARAMS_GNUABI_SVE sve s x x svfloat64_t svfloat32_t svint32_t svint32_t __ARM_SVE) +command_arguments(RENAME_PARAMS_RVVM1 finz_ x x rvvm1) +command_arguments(RENAME_PARAMS_RVVM1NOFMA cinz_ x x rvvm1nofma) +command_arguments(RENAME_PARAMS_RVVM2 finz_ x x rvvm2) +command_arguments(RENAME_PARAMS_RVVM2NOFMA cinz_ x x rvvm2nofma) + # ALIAS_PARAMS command_arguments(ALIAS_PARAMS_AVX512F_DP 8 __m512d __m256i e avx512f) @@ -476,12 +495,17 @@ if(BUILD_INLINE_HEADERS) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} SIMDLC) + if(CMAKE_CROSSCOMPILING AND CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_TARGET) + set(FLAG_TARGET --target=${CMAKE_C_COMPILER_TARGET}) + endif() + set(INLINE_HEADER_FILE ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h) add_custom_command( OUTPUT ${INLINE_HEADER_FILE} # Preprocess sleefsimddp.c with SLEEF_GENHEADER defined, comments are preserved COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} # gcc -E -C + ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}} # -msse2 ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch # -I/sleef/src/common -I/sleef/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ # -I/build/src/libm/include ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME @@ -497,6 +521,7 @@ if(BUILD_INLINE_HEADERS) # Preprocess sleefsimdsp.c with SLEEF_GENHEADER defined. Include macroonly*.h instead of helper*.h. COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} # gcc -E -C + ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}} # -msse2 ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch # -I/sleef/src/common -I/sleef/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ # -I/build/src/libm/include ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME diff --git a/src/libm/sleeflibm_header.h.org.in b/src/libm/sleeflibm_header.h.org.in index d637b60e..89b3a1ca 100644 --- a/src/libm/sleeflibm_header.h.org.in +++ b/src/libm/sleeflibm_header.h.org.in @@ -131,6 +131,18 @@ SLEEF_IMPORT void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); // +#if defined(__riscv_v) +#include +typedef vfloat64m2_t Sleef_vfloat64m1_t_2; +typedef vfloat32m2_t Sleef_vfloat32m1_t_2; +typedef vfloat64m4_t Sleef_vfloat64m2_t_2; +typedef vfloat32m4_t Sleef_vfloat32m2_t_2; +#define Sleef_vfloat64m1_t_2_DEFINED +#define Sleef_vfloat32m1_t_2_DEFINED +#define Sleef_vfloat64m2_t_2_DEFINED +#define Sleef_vfloat32m2_t_2_DEFINED +#endif + #ifndef Sleef_double2_DEFINED #define Sleef_double2_DEFINED typedef struct { diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c index e531495f..6cdaa6ad 100644 --- a/src/libm/sleefsimddp.c +++ b/src/libm/sleefsimddp.c @@ -221,6 +221,59 @@ extern const double Sleef_rempitabdp[]; #endif #endif +// RISC-V +#ifdef ENABLE_RVVM1 +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_DP +#include "helperrvv.h" +#else +#include "macroonlyRVVM1.h" +#endif +#ifdef DORENAME +#include "renamervvm1.h" +#endif +#endif + +#ifdef ENABLE_RVVM1NOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_DP +#include "helperrvv.h" +#else +#include "macroonlyRVVM1NOFMA.h" +#endif +#ifdef DORENAME +#include "renamervvm1nofma.h" +#endif +#endif /* ENABLE_RVVM1NOFMA */ + +#ifdef ENABLE_RVVM2 +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_DP +#include "helperrvv.h" +#else +#include "macroonlyRVVM2.h" +#endif +#ifdef DORENAME +#include "renamervvm2.h" +#endif +#endif + +#ifdef ENABLE_RVVM2NOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_DP +#include "helperrvv.h" +#else +#include "macroonlyRVVM2NOFMA.h" +#endif +#ifdef DORENAME +#include "renamervvm2nofma.h" +#endif +#endif /* ENABLE_RVVM2NOFMA */ + // Generic #ifdef ENABLE_VECEXT diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c index 9e1faa23..c5dbc2a5 100644 --- a/src/libm/sleefsimdsp.c +++ b/src/libm/sleefsimdsp.c @@ -321,6 +321,59 @@ extern const float Sleef_rempitabsp[]; #endif #endif +// RISC-V +#ifdef ENABLE_RVVM1 +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_SP +#include "helperrvv.h" +#else +#include "macroonlyRVVM1.h" +#endif +#ifdef DORENAME +#include "renamervvm1.h" +#endif +#endif + +#ifdef ENABLE_RVVM1NOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_SP +#include "helperrvv.h" +#else +#include "macroonlyRVVM1NOFMA.h" +#endif +#ifdef DORENAME +#include "renamervvm1nofma.h" +#endif +#endif + +#ifdef ENABLE_RVVM2 +#define CONFIG 1 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_SP +#include "helperrvv.h" +#else +#include "macroonlyRVVM2.h" +#endif +#ifdef DORENAME +#include "renamervvm2.h" +#endif +#endif + +#ifdef ENABLE_RVVM2NOFMA +#define CONFIG 2 +#if !defined(SLEEF_GENHEADER) +#define ENABLE_RVV_SP +#include "helperrvv.h" +#else +#include "macroonlyRVVM2NOFMA.h" +#endif +#ifdef DORENAME +#include "renamervvm2nofma.h" +#endif +#endif + // Generic #ifdef ENABLE_VECEXT @@ -401,6 +454,7 @@ static INLINE CONST VECTOR_CC vmask vsignbit_vm_vf(vfloat f) { return vand_vm_vm_vm(vreinterpret_vm_vf(f), vreinterpret_vm_vf(vcast_vf_f(-0.0f))); } +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static INLINE CONST VECTOR_CC vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y))); } @@ -413,6 +467,7 @@ static INLINE CONST VECTOR_CC vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) { static INLINE CONST VECTOR_CC vfloat vsign_vf_vf(vfloat f) { return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(1.0f)), vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f)))); } +#endif static INLINE CONST VECTOR_CC vopmask vsignbit_vo_vf(vfloat d) { return veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vcast_vi2_i(0x80000000)), vcast_vi2_i(0x80000000)); @@ -487,7 +542,7 @@ static INLINE CONST VECTOR_CC vfloat vldexp3_vf_vf_vi2(vfloat d, vint2 q) { EXPORT CONST VECTOR_CC vfloat xldexpf(vfloat x, vint2 q) { return vldexp_vf_vf_vi2(x, q); } -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) typedef struct { vfloat d; vint2 i; @@ -517,9 +572,11 @@ static dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) { } #endif +#if !(defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) static INLINE CONST VECTOR_CC vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) { return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y))); } +#endif static INLINE CONST fi_t rempisubf(vfloat x) { #ifdef FULL_FP_ROUNDING @@ -3290,7 +3347,7 @@ EXPORT CONST VECTOR_CC vfloat xcospif_u05(vfloat d) { } #endif // #if !defined(DETERMINISTIC) -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(ENABLE_RVVM1) || defined(ENABLE_RVVM1NOFMA) || defined(ENABLE_RVVM2) || defined(ENABLE_RVVM2NOFMA)) typedef struct { vfloat2 a, b; } df2; diff --git a/src/quad/CMakeLists.txt b/src/quad/CMakeLists.txt index f37de951..5eae42da 100644 --- a/src/quad/CMakeLists.txt +++ b/src/quad/CMakeLists.txt @@ -234,12 +234,17 @@ if(BUILD_INLINE_HEADERS) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} SIMDLC) + if(CMAKE_CROSSCOMPILING AND CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_TARGET) + set(FLAG_TARGET --target=${CMAKE_C_COMPILER_TARGET}) + endif() + set(INLINE_HEADER_FILE ${PROJECT_BINARY_DIR}/include/sleefquadinline_${SIMDLC}.h) add_custom_command( OUTPUT ${INLINE_HEADER_FILE} # Preprocess sleefsimddp.c with SLEEF_GENHEADER defined, comments are preserved COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} # gcc -E -C + ${FLAG_TARGET} ${FLAGS_ENABLE_${SIMD}} # -msse2 ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch # -I/sleef/src/common -I/sleef/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ # -I/build/src/quad/include ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME diff --git a/travis/before_script.aarch64-gcc.sh b/travis/before_script.aarch64-gcc.sh index 56c4c88c..f590a9db 100644 --- a/travis/before_script.aarch64-gcc.sh +++ b/travis/before_script.aarch64-gcc.sh @@ -8,5 +8,5 @@ ninja all cd /build mkdir build-cross cd build-cross -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64-gcc.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.armhf-gcc.sh b/travis/before_script.armhf-gcc.sh index 8c4bd4fa..464fa581 100644 --- a/travis/before_script.armhf-gcc.sh +++ b/travis/before_script.armhf-gcc.sh @@ -8,4 +8,4 @@ ninja all cd /build mkdir build-cross cd build-cross -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf-gcc.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE .. diff --git a/travis/toolchain-aarch64-gcc.cmake b/travis/toolchain-aarch64-gcc.cmake new file mode 100644 index 00000000..c3594551 --- /dev/null +++ b/travis/toolchain-aarch64-gcc.cmake @@ -0,0 +1,11 @@ +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "aarch64") + +SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu) + +find_program(CMAKE_C_COMPILER NAMES aarch64-linux-gnu-gcc-11 aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5 aarch64-linux-gnu-gcc) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/travis/toolchain-aarch64.cmake b/travis/toolchain-aarch64-llvm.cmake similarity index 71% rename from travis/toolchain-aarch64.cmake rename to travis/toolchain-aarch64-llvm.cmake index c73de216..d9c11ae0 100644 --- a/travis/toolchain-aarch64.cmake +++ b/travis/toolchain-aarch64-llvm.cmake @@ -4,7 +4,8 @@ SET (CMAKE_SYSTEM_PROCESSOR "aarch64") SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu) -find_program(CMAKE_C_COMPILER aarch64-linux-gnu-gcc aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5) +find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang) +set(CMAKE_C_COMPILER_TARGET aarch64-linux-gnu) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) diff --git a/travis/toolchain-armhf-gcc.cmake b/travis/toolchain-armhf-gcc.cmake new file mode 100644 index 00000000..24e160b9 --- /dev/null +++ b/travis/toolchain-armhf-gcc.cmake @@ -0,0 +1,11 @@ +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "armhf") + +SET(CMAKE_FIND_ROOT_PATH /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf) + +find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabihf-gcc-11 arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5 arm-linux-gnueabihf-gcc) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/travis/toolchain-armhf.cmake b/travis/toolchain-armhf-llvm.cmake similarity index 69% rename from travis/toolchain-armhf.cmake rename to travis/toolchain-armhf-llvm.cmake index ba233487..6c157289 100644 --- a/travis/toolchain-armhf.cmake +++ b/travis/toolchain-armhf-llvm.cmake @@ -4,7 +4,8 @@ SET (CMAKE_SYSTEM_PROCESSOR "armhf") SET(CMAKE_FIND_ROOT_PATH /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf) -find_program(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5) +find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang) +set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabihf) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) diff --git a/travis/toolchain-native-gcc.cmake b/travis/toolchain-native-gcc.cmake new file mode 100644 index 00000000..07ea294d --- /dev/null +++ b/travis/toolchain-native-gcc.cmake @@ -0,0 +1 @@ +find_program(CMAKE_C_COMPILER gcc) diff --git a/travis/toolchain-native-llvm.cmake b/travis/toolchain-native-llvm.cmake new file mode 100644 index 00000000..6f8e7121 --- /dev/null +++ b/travis/toolchain-native-llvm.cmake @@ -0,0 +1 @@ +find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang) diff --git a/travis/toolchain-ppc64el.cmake b/travis/toolchain-ppc64el-gcc.cmake similarity index 80% rename from travis/toolchain-ppc64el.cmake rename to travis/toolchain-ppc64el-gcc.cmake index e26a6eaa..7d6c96ae 100644 --- a/travis/toolchain-ppc64el.cmake +++ b/travis/toolchain-ppc64el-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "ppc64") SET(CMAKE_FIND_ROOT_PATH /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu) -find_program(CMAKE_C_COMPILER powerpc64le-linux-gnu-gcc ppc64el-cc) +find_program(CMAKE_C_COMPILER NAMES powerpc64le-linux-gnu-gcc-11 powerpc64le-linux-gnu-gcc ppc64el-cc) SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar) diff --git a/travis/toolchain-ppc64el-llvm.cmake b/travis/toolchain-ppc64el-llvm.cmake new file mode 100644 index 00000000..531b36f3 --- /dev/null +++ b/travis/toolchain-ppc64el-llvm.cmake @@ -0,0 +1,14 @@ +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "ppc64") + +SET(CMAKE_FIND_ROOT_PATH /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu) + +find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang) +set(CMAKE_C_COMPILER_TARGET powerpc64le-linux-gnu) + +SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/travis/toolchain-riscv64-gcc.cmake b/travis/toolchain-riscv64-gcc.cmake new file mode 100644 index 00000000..b0840998 --- /dev/null +++ b/travis/toolchain-riscv64-gcc.cmake @@ -0,0 +1,11 @@ +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "riscv64") + +SET(CMAKE_FIND_ROOT_PATH /usr/riscv64-linux-gnu /usr/include/riscv64-linux-gnu /usr/lib/riscv64-linux-gnu /lib/riscv64-linux-gnu) + +find_program(CMAKE_C_COMPILER NAMES riscv64-linux-gnu-gcc-14 riscv64-linux-gnu-gcc) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/travis/toolchain-riscv64-llvm.cmake b/travis/toolchain-riscv64-llvm.cmake new file mode 100644 index 00000000..1821770a --- /dev/null +++ b/travis/toolchain-riscv64-llvm.cmake @@ -0,0 +1,12 @@ +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "riscv64") + +SET(CMAKE_FIND_ROOT_PATH /usr/riscv64-linux-gnu /usr/include/riscv64-linux-gnu /usr/lib/riscv64-linux-gnu /lib/riscv64-linux-gnu) + +find_program(CMAKE_C_COMPILER NAMES clang-17 clang) +set(CMAKE_C_COMPILER_TARGET riscv64-linux-gnu) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/travis/toolchain-s390x.cmake b/travis/toolchain-s390x-gcc.cmake similarity index 81% rename from travis/toolchain-s390x.cmake rename to travis/toolchain-s390x-gcc.cmake index a2d37bda..4aa9f12c 100644 --- a/travis/toolchain-s390x.cmake +++ b/travis/toolchain-s390x-gcc.cmake @@ -4,7 +4,7 @@ SET (CMAKE_SYSTEM_PROCESSOR "s390x") SET(CMAKE_FIND_ROOT_PATH /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu) -find_program(CMAKE_C_COMPILER s390x-linux-gnu-gcc) +find_program(CMAKE_C_COMPILER NAMES s390x-linux-gnu-gcc-11 s390x-linux-gnu-gcc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) diff --git a/travis/toolchain-s390x-llvm.cmake b/travis/toolchain-s390x-llvm.cmake new file mode 100644 index 00000000..ca5e9687 --- /dev/null +++ b/travis/toolchain-s390x-llvm.cmake @@ -0,0 +1,12 @@ +SET (CMAKE_CROSSCOMPILING TRUE) +SET (CMAKE_SYSTEM_NAME "Linux") +SET (CMAKE_SYSTEM_PROCESSOR "s390x") + +SET(CMAKE_FIND_ROOT_PATH /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu) + +find_program(CMAKE_C_COMPILER NAMES clang-17 clang-16 clang-15 clang-14 clang-13 clang) +set(CMAKE_C_COMPILER_TARGET s390x-linux-gnu) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)