Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use hardware-accelerated __fp16 types with Clang #27

Merged
merged 1 commit into from
Jun 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,20 @@ jobs:
run: cmake --build build --config Release --parallel -- -quiet
- name: Test
run: ctest --test-dir build --build-config Release --parallel --output-on-failure
cmake-macos-x86_64-f16c:
runs-on: macos-12
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- name: Configure
run: cmake -Bbuild -S. -G Xcode -DCMAKE_OSX_ARCHITECTURES=x86_64 -DFP16_BUILD_COMPARATIVE_BENCHMARKS=ON
env:
CFLAGS: "-mf16c"
CXXFLAGS: "-mf16c"
- name: Build
run: cmake --build build --config Release --parallel -- -quiet
- name: Test
run: ctest --test-dir build --build-config Release --parallel --output-on-failure
cmake-macos-arm64:
runs-on: macos-14
timeout-minutes: 15
Expand Down
17 changes: 17 additions & 0 deletions include/fp16/fp16.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#endif

#include <fp16/bitcasts.h>
#include <fp16/macros.h>


/*
Expand Down Expand Up @@ -106,6 +107,13 @@ static inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) {
* floating-point operations and bitcasts between integer and floating-point variables.
*/
static inline float fp16_ieee_to_fp32_value(uint16_t h) {
#if FP16_USE_FP16_TYPE
union {
uint16_t as_bits;
__fp16 as_value;
} fp16 = { h };
return (float) fp16.as_value;
#else
/*
* Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
* +---+-----+------------+-------------------+
Expand Down Expand Up @@ -211,6 +219,7 @@ static inline float fp16_ieee_to_fp32_value(uint16_t h) {
const uint32_t result = sign |
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
return fp32_from_bits(result);
#endif
}

/*
Expand All @@ -221,6 +230,13 @@ static inline float fp16_ieee_to_fp32_value(uint16_t h) {
* floating-point operations and bitcasts between integer and floating-point variables.
*/
static inline uint16_t fp16_ieee_from_fp32_value(float f) {
#if FP16_USE_FP16_TYPE
union {
__fp16 as_value;
uint16_t as_bits;
} fp16 = { (__fp16) f };
return fp16.as_bits;
#else
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
const float scale_to_inf = 0x1.0p+112f;
const float scale_to_zero = 0x1.0p-110f;
Expand Down Expand Up @@ -249,6 +265,7 @@ static inline uint16_t fp16_ieee_from_fp32_value(float f) {
const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
const uint32_t nonsign = exp_bits + mantissa_bits;
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
#endif
}

/*
Expand Down
17 changes: 17 additions & 0 deletions include/fp16/macros.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once
#ifndef FP16_MACROS_H
#define FP16_MACROS_H


#ifndef FP16_USE_FP16_TYPE
#if defined(__clang__)
#if defined(__F16C__) || defined(__aarch64__)
#define FP16_USE_FP16_TYPE 1
#endif
#endif
#if !defined(FP16_USE_FP16_TYPE)
#define FP16_USE_FP16_TYPE 0
#endif // !defined(FP16_USE_FP16_TYPE)
#endif // !defined(FP16_USE_FP16_TYPE)

#endif /* FP16_MACROS_H */