Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IBM System/390 support #291

Merged
merged 38 commits into from
Aug 26, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ matrix:
env:
- LABEL=common
- OMP_WAIT_POLICY=passive CTEST_OUTPUT_ON_FAILURE=TRUE
- os: linux
dist: bionic
arch: s390x
addons:
apt:
packages:
- g++-8 libmpfr-dev libssl-dev libfftw3-dev
env:
- LABEL="s390x-gcc"

before_install:
- export PATH=$PATH:/usr/bin
Expand Down
89 changes: 81 additions & 8 deletions Configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ set(SLEEF_SUPPORTED_EXTENSIONS
ADVSIMD ADVSIMDNOFMA SVE SVENOFMA # Aarch64
NEON32 NEON32VFPV4 # Aarch32
VSX VSXNOFMA # PPC64
ZVECTOR ZVECTORNOFMA ZVECTOR2 ZVECTOR2NOFMA # IBM Z
PUREC_SCALAR PURECFMA_SCALAR # Generic type
CACHE STRING "List of SIMD architectures supported by libsleef."
)
Expand Down Expand Up @@ -197,6 +198,31 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
set(TESTER3_DEFINITIONS_VSX ATR=finz_ DPTYPE=__vector_double SPTYPE=__vector_float DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vsx)
set(TESTER3_DEFINITIONS_VSXNOFMA ATR=cinz_ DPTYPE=__vector_double SPTYPE=__vector_float DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vsxnofma)

elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
set(SLEEF_ARCH_S390X ON CACHE INTERNAL "True for IBM Z architecture.")

set(SLEEF_HEADER_LIST
ZVECTOR_
ZVECTOR
ZVECTORNOFMA
ZVECTOR2
ZVECTOR2NOFMA
PUREC_SCALAR
PURECFMA_SCALAR
)

set(HEADER_PARAMS_ZVECTOR_ finz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__)
set(HEADER_PARAMS_ZVECTOR finz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ zvector)
set(HEADER_PARAMS_ZVECTORNOFMA cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ zvectornofma)
set(HEADER_PARAMS_ZVECTOR2 finz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ zvector2)
set(HEADER_PARAMS_ZVECTOR2NOFMA cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ zvector2nofma)

set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z13")

set(TESTER3_DEFINITIONS_ZVECTOR ATR=finz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=zvector)
set(TESTER3_DEFINITIONS_ZVECTORNOFMA ATR=cinz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=zvectornofma)
set(TESTER3_DEFINITIONS_ZVECTOR2 ATR=finz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=zvector2)
set(TESTER3_DEFINITIONS_ZVECTOR2NOFMA ATR=cinz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=zvector2nofma)
endif()

command_arguments(HEADER_PARAMS_PUREC_SCALAR cinz_ 1 1 double float int32_t int32_t __STDC__ purec)
Expand Down Expand Up @@ -226,6 +252,10 @@ command_arguments(RENAME_PARAMS_NEON32 cinz_ 2 4 neon)
command_arguments(RENAME_PARAMS_NEON32VFPV4 finz_ 2 4 neonvfpv4)
command_arguments(RENAME_PARAMS_VSX finz_ 2 4 vsx)
command_arguments(RENAME_PARAMS_VSXNOFMA cinz_ 2 4 vsxnofma)
command_arguments(RENAME_PARAMS_ZVECTOR finz_ 2 4 zvector)
command_arguments(RENAME_PARAMS_ZVECTORNOFMA cinz_ 2 4 zvectornofma)
command_arguments(RENAME_PARAMS_ZVECTOR2 finz_ 2 4 zvector2)
command_arguments(RENAME_PARAMS_ZVECTOR2NOFMA cinz_ 2 4 zvector2nofma)
command_arguments(RENAME_PARAMS_PUREC_SCALAR cinz_ 1 1 purec)
command_arguments(RENAME_PARAMS_PURECFMA_SCALAR finz_ 1 1 purecfma)
# The vector length parameters in SVE, for SP and DP, are chosen for
Expand Down Expand Up @@ -282,6 +312,11 @@ set(CLANG_FLAGS_ENABLE_SVENOFMA "-march=armv8-a+sve")
# PPC64
set(CLANG_FLAGS_ENABLE_VSX "-mcpu=power8")
set(CLANG_FLAGS_ENABLE_VSXNOFMA "-mcpu=power8")
# IBM z
set(CLANG_FLAGS_ENABLE_ZVECTOR "-march=z13;-mzvector")
set(CLANG_FLAGS_ENABLE_ZVECTORNOFMA "-march=z13;-mzvector")
set(CLANG_FLAGS_ENABLE_ZVECTOR2 "-march=z14;-mzvector")
set(CLANG_FLAGS_ENABLE_ZVECTOR2NOFMA "-march=z14;-mzvector")

# All variables storing compiler flags should be prefixed with FLAGS_
if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
Expand Down Expand Up @@ -407,7 +442,7 @@ option(DISABLE_SSE2 "Disable SSE2" OFF)
option(ENFORCE_SSE2 "Build fails if SSE2 is not supported by the compiler" OFF)

if(SLEEF_ARCH_X86 AND NOT DISABLE_SSE2)
set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SSE2})
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SSE2})
CHECK_C_SOURCE_COMPILES("
#if defined(_MSC_VER)
#include <intrin.h>
Expand All @@ -429,7 +464,7 @@ option(DISABLE_SSE4 "Disable SSE4" OFF)
option(ENFORCE_SSE4 "Build fails if SSE4 is not supported by the compiler" OFF)

if(SLEEF_ARCH_X86 AND NOT DISABLE_SSE4)
set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SSE4})
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SSE4})
CHECK_C_SOURCE_COMPILES("
#if defined(_MSC_VER)
#include <intrin.h>
Expand All @@ -451,7 +486,7 @@ option(ENFORCE_AVX "Disable AVX" OFF)
option(ENFORCE_AVX "Build fails if AVX is not supported by the compiler" OFF)

if(SLEEF_ARCH_X86 AND NOT DISABLE_AVX)
set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_AVX})
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_AVX})
CHECK_C_SOURCE_COMPILES("
#if defined(_MSC_VER)
#include <intrin.h>
Expand All @@ -473,7 +508,7 @@ option(DISABLE_FMA4 "Disable FMA4" OFF)
option(ENFORCE_FMA4 "Build fails if FMA4 is not supported by the compiler" OFF)

if(SLEEF_ARCH_X86 AND NOT DISABLE_FMA4)
set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_FMA4})
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_FMA4})
CHECK_C_SOURCE_COMPILES("
#if defined(_MSC_VER)
#include <intrin.h>
Expand All @@ -495,7 +530,7 @@ option(DISABLE_AVX2 "Disable AVX2" OFF)
option(ENFORCE_AVX2 "Build fails if AVX2 is not supported by the compiler" OFF)

if(SLEEF_ARCH_X86 AND NOT DISABLE_AVX2)
set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_AVX2})
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_AVX2})
CHECK_C_SOURCE_COMPILES("
#if defined(_MSC_VER)
#include <intrin.h>
Expand All @@ -522,7 +557,7 @@ option(DISABLE_AVX512F "Disable AVX512F" OFF)
option(ENFORCE_AVX512F "Build fails if AVX512F is not supported by the compiler" OFF)

if(SLEEF_ARCH_X86 AND NOT DISABLE_AVX512F)
set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_AVX512F})
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_AVX512F})
CHECK_C_SOURCE_COMPILES("
#if defined(_MSC_VER)
#include <intrin.h>
Expand Down Expand Up @@ -554,7 +589,7 @@ option(DISABLE_SVE "Disable SVE" OFF)
option(ENFORCE_SVE "Build fails if SVE is not supported by the compiler" OFF)

if(SLEEF_ARCH_AARCH64 AND NOT DISABLE_SVE)
set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SVE})
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SVE})
CHECK_C_SOURCE_COMPILES("
#include <arm_sve.h>
int main() {
Expand All @@ -576,7 +611,7 @@ option(DISABLE_VSX "Disable VSX" OFF)
option(ENFORCE_VSX "Build fails if VSX is not supported by the compiler" OFF)

if(SLEEF_ARCH_PPC64 AND NOT DISABLE_VSX)
set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_VSX})
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_VSX})
CHECK_C_SOURCE_COMPILES("
#include <altivec.h>
#ifndef __LITTLE_ENDIAN__
Expand All @@ -600,6 +635,44 @@ if (ENFORCE_VSX AND NOT COMPILER_SUPPORTS_VSX)
message(FATAL_ERROR "ENFORCE_VSX is specified and that feature is disabled or not supported by the compiler")
endif()

# IBM Z

option(DISABLE_ZVECTOR "Disable ZVECTOR" OFF)
option(ENFORCE_ZVECTOR "Build fails if ZVECTOR is not supported by the compiler" OFF)

if(SLEEF_ARCH_S390X AND NOT DISABLE_ZVECTOR)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_ZVECTOR}")
CHECK_C_SOURCE_COMPILES("
#include <vecintrin.h>
int main() {
__vector double d;
d = __builtin_s390_vfidb(d, 4, 4);
}"
COMPILER_SUPPORTS_ZVECTOR)
endif()

if (ENFORCE_ZVECTOR AND NOT COMPILER_SUPPORTS_ZVECTOR)
message(FATAL_ERROR "ENFORCE_ZVECTOR is specified and that feature is disabled or not supported by the compiler")
endif()

option(DISABLE_ZVECTOR2 "Disable ZVECTOR2" OFF)
option(ENFORCE_ZVECTOR2 "Build fails if ZVECTOR2 is not supported by the compiler" OFF)

if(SLEEF_ARCH_S390X AND NOT DISABLE_ZVECTOR2)
string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_ZVECTOR2}")
CHECK_C_SOURCE_COMPILES("
#include <vecintrin.h>
int main() {
__vector float d;
d = vec_sqrt(d);
}"
COMPILER_SUPPORTS_ZVECTOR2)
endif()

if (ENFORCE_ZVECTOR2 AND NOT COMPILER_SUPPORTS_ZVECTOR2)
message(FATAL_ERROR "ENFORCE_ZVECTOR2 is specified and that feature is disabled or not supported by the compiler")
endif()

# OpenMP

option(DISABLE_OPENMP "Disable OPENMP" OFF)
Expand Down
11 changes: 8 additions & 3 deletions src/arch/helperpurec_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#define ENABLE_FMA_DP
#define ENABLE_FMA_SP

#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__)
#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__)
#ifndef FP_FAST_FMA
#define FP_FAST_FMA
#endif
Expand Down Expand Up @@ -290,16 +290,21 @@ static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return vcast_vf_vi2(vtruncate_
static INLINE vfloat vcast_vf_f(float f) { return f; }
static INLINE vmask vreinterpret_vm_vf(vfloat vf) { union { vfloat vf; vmask vm; } cnv; cnv.vf = vf; return cnv.vm; }
static INLINE vfloat vreinterpret_vf_vm(vmask vm) { union { vfloat vf; vmask vm; } cnv; cnv.vm = vm; return cnv.vf; }
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { union { vfloat vf[2]; vint2 vi2; } cnv; cnv.vi2 = vi; return cnv.vf[1]; }
static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { union { vfloat vf[2]; vint2 vi2; } cnv; cnv.vi2 = 0; cnv.vf[1] = vf; return cnv.vi2; }
#else
static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { union { vfloat vf; vint2 vi2; } cnv; cnv.vi2 = vi; return cnv.vf; }
static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { union { vfloat vf; vint2 vi2; } cnv; cnv.vi2 = 0; cnv.vf = vf; return cnv.vi2; }
#endif

static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return x + y; }
static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return x - y; }
static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return x * y; }
static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return x / y; }
static INLINE vfloat vrec_vf_vf (vfloat x) { return 1 / x; }

static INLINE vfloat vabs_vf_vf(vfloat x) { versatileVector v = { .f = x }; v.x &= 0x7fffffff; return v.f; }
static INLINE vfloat vabs_vf_vf(vfloat x) { versatileVector v = { .f = x }; v.i[0] &= 0x7fffffff; return v.f; }
static INLINE vfloat vneg_vf_vf(vfloat x) { return -x; }

static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return x > y ? x : y; }
Expand Down Expand Up @@ -329,7 +334,7 @@ static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return x >= y ? ~(uint

static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { versatileVector v = { .i2 = x }, w = { .i2 = y }; v.i[0] += w.i[0]; v.i[1] += w.i[1]; return v.i2; }
static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { versatileVector v = { .i2 = x }, w = { .i2 = y }; v.i[0] -= w.i[0]; v.i[1] -= w.i[1]; return v.i2; }
static INLINE vint2 vneg_vi2_vi2(vint2 x) { versatileVector v = { .i2 = x }; v.i[0] = -v.i[0]; return v.i2; }
static INLINE vint2 vneg_vi2_vi2(vint2 x) { versatileVector v = { .i2 = x }; v.i[0] = -v.i[0]; v.i[1] = -v.i[1]; return v.i2; }

static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return x & y; }
static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return y & ~x; }
Expand Down
Loading