Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LIBM] Introduce faster method for evaluating polynomials #239

Merged
merged 12 commits into from
Feb 12, 2019
Merged
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@ http://www.boost.org/LICENSE_1_0.txt.
Contributions to this project are accepted under the same license.


Copyright Naoki Shibata and contributors 2010 - 2017.
Copyright Naoki Shibata and contributors 2010 - 2019.

Main Page : http://sleef.org/
GitHub Repo : https://github.com/shibatch/sleef
60 changes: 30 additions & 30 deletions src/libm-tester/hash_cinz.txt
Original file line number Diff line number Diff line change
@@ -1,48 +1,48 @@
sin u35 ec03e7577871ef943c4b1ca6a485d08c
sin u10 0f8444e11d3779dc4c28f3db353607bf
cos u35 b87d0826f08562a17c71125d39fb5919
cos u10 9f9e4f5f3d2d84deeab3829375054fe8
tan u35 b04ff8785a7c98694842004ccb3da186
tan u10 9a4e53fd7e9245c7c1c8cb9cd30b1644
sin u35 bc50dfbcbd8ef534541d1babe90860c7
sin u10 9de2d9fcee4d16f87b463b813226b4cf
cos u35 506e34a809b80ad3603ed46ba2a574b0
cos u10 7365446329254723b64987da76aea812
tan u35 cfb4aacb5f14e68c3171246838ba8f57
tan u10 5fd08e0552e3ab853439bf5fd2bd344d
sincos u10 7c164edcaa45988f6165b653fc76c495
sincos u35 38fe7e261e184ed8dbf432ce6bedc5c4
sincospi u05 0c6fc00c7aaf0b6e67d66542d1be833d
sincospi u35 c428b0fc3e6c5be4d2c03dcd8bb27a7c
log u10 4855b27222d900bea47a27cadba71727
log u35 9c60b59cc6bba46d016798b97ea939c2
log10 u10 ee979872a5a5181905eb7d04eefb3870
log u35 c95484de57c167da3d8d6d1baadf9ffa
log10 u10 36645e8031d873d66fd0ec2c5959f273
log1p u10 1383924fb56cf2e7eda27de21320c591
exp u10 1a8756694ca98254c4d872646b639721
exp2 u10 6aeddf0a30622490d3fabbe4851e3e7a
exp u10 23e2f486c7e9d2068f3f6d30a3161ffd
exp2 u10 436146f8d6dcaa4a754837108a9aa3e1
exp10 u10 9d704b310f683872a6446cfc97726a4d
expm1 u10 374817f0728c57b0ec019c3bfb141a79
pow u10 15d42c41ce130c3a61fff3686632c8ce
expm1 u10 cd3f0b8e86943d52c278394b60e2d22e
pow u10 a0ea63b27d33262346a35c9439741075
cbrt u10 5d8bf28ac74624594fd1be9217817690
cbrt u10 3c896e03746bcf1b3f70182dfec3d93b
cbrt u35 73daa306764e208aab1627ac110b10d7
cbrt u35 c29b7bf200215425b4ba948c8cc94c42
hypot u05 cc2f18e409e19a02cadf7b91fd869120
hypot u35 5194e0a554174a6145511ce3df9c1f46
asin u10 bae9eb3b4f484295f3f8641e61808dc6
asin u35 9bea0a03680e83cbd198262ecdb37e28
acos u10 1c435f5e072ad2fe458ed3cb7d61efdb
acos u35 c7f12cdf2aa9b68b0476053d1727b42f
atan u10 1f13ac8525edc54818f136bd9cd8ac46
atan u10 2b2309e489a92e208e83d1641301e59c
atan u35 c62cfeb4fa325a5b85613723723b1d98
atan u35 515a88bf70e307daee25f24178e3791a
atan2 u10 b3cbe8bf7c3a75c4eb1a767e2331b5ca
atan2 u35 07060ed63483b89fb3c27840d200d747
sinh u10 25b8333e58c6a39e5b0f8ba085b218ef
cosh u10 beb5e63186c0d580e11333e206aa31b1
asin u10 86c061caec3fa2e1bc71bda4dad29f4c
asin u35 31303b88bdc00206265002d6cc5e89e4
acos u10 0a1a403590f2ac8364f132b334920945
acos u35 493f960c1cce57931d95a5a22a0587a3
atan u10 e5c01c5ec2e9a535ac8d3000c346067d
atan u10 584ebe4bf2df7b60210f87f74b7d21d1
atan u35 9d6d83e066b5a4851d44771418c9948c
atan u35 f32c1aa4caa08c6945afd1125ba8b113
atan2 u10 dfa671b1bae503fcae52246f65da3324
atan2 u35 afb07894347062a96dab705b34eb1763
sinh u10 61d459b1f368087f6f23ebf8e9f0ea01
cosh u10 f77eb95f79e274c12b4e92dc0389259b
tanh u10 2bb9dd54ed0fa22bb5f3b6d557eb58a3
asinh u10 01136e54e2a434839530dda54f33cfdb
acosh u10 2f3c28c9ee2eb2b3d5659c6cb2a58e3e
atanh u10 601a77ba8c1d5175f2808b48a41260c1
lgamma u10 90cdc41063f4198c6ad592c0cdd0f5da
tgamma u10 74ac3717702a9586553f83cd9abfce6e
tgamma u10 6f864c3a1f17fbdf914cac7ffcd82cb7
erf u10 4031f3e285101359aea99feb5e2de3f0
erfc u15 9b6e3d08a9d409154bcbf0ab8315f0bd
erfc u15 5e116a4316dafa742769f71e18f6f9fe
fabs bef2f2ac8a4789357e580b4da4f9b9fe
copysign 3219022f267464e3704f90558e8df3bc
fmax 4e4f5220ccfef191864c316df0d18fc0
@@ -60,7 +60,7 @@ sinf u35 833d845950b9cbb025629fe4c040f8f6
sinf u10 9c21afa4d7d6af3fc666309c3cd647fe
cosf u35 74d7f871a6553cd0019087895e2052ad
cosf u10 35349e94c323c1614f22093959288010
tanf u35 5412973e369bb09dc7d506823b6fb4e9
tanf u35 bbb7c092d017e96d2454a38a20687735
tanf u10 227423bc04f42d76a8f68082ba696126
sincosf u10 83ecc4e3d5295056e9d8c52bc196b666
sincosf u35 533319caa49a961e4909bd6dcab40721
@@ -87,10 +87,10 @@ acosf u10 15617dd0429b90e59d2923415934c2a6
acosf u35 af0b132d9e263721f9296187dbf9b9bf
atanf u10 26b77fb423104b45633cf24500237d6e
atanf u10 4313d0bc2708de53f74d804aac6564d4
atanf u35 12c1b3ad574c3cbf642690ab13bf27d1
atanf u35 a112a95870d110dddf8a4aa7e877c556
atanf u35 97a1797897955643c722c7d291987331
atanf u35 7d3f47169415058e8578f11d899bfd10
atan2f u10 098a33f730fe95ce4774a991db4cee14
atan2f u35 25a1ce0f275b31ab2d77a5acc9d25ece
atan2f u35 56fc6bd8349979f0d0b1dcdb57f68363
sinhf u10 0780a2f57df3a831718195d1ee5c19ef
coshf u10 cfbb6aed408e43a7b7f053474100ff2d
tanhf u10 d19f254d41e8726c748df87b95bc9acd
60 changes: 30 additions & 30 deletions src/libm-tester/hash_finz.txt
Original file line number Diff line number Diff line change
@@ -1,48 +1,48 @@
sin u35 2e8fd38a29df4880a9aa9e4ddca62a52
sin u10 b0419d95daaef51278bf8fd5c347e211
cos u35 ff31d0d21e577876dd019d0ff215e2d1
cos u10 165b5501b94897c832aee0cca2227aba
tan u35 1e31c80ded9720a3b3fcf68eb2e7a5f1
tan u10 bd0bec1cd3107d66ce1de78af2929a2d
sin u35 c163e4a7e9ccebb2181dcc8653367d8c
sin u10 a2bda8db1c7d23a9b2a805bf8e0ec95e
cos u35 52f902bd939d751b5b544ac70181fcff
cos u10 c07bdf6738ba6aabed3cb2d25610ad14
tan u35 4651b9aa79fab36a07e3d4d6ef250d2f
tan u10 c98f29a62067fa63646d9bcc29a310c6
sincos u10 3fe37f4eb805505152f2b14a22a9f94e
sincos u35 95a7b7f48c71febf10ec6eff796dd391
sincospi u05 0c6fc00c7aaf0b6e67d66542d1be833d
sincospi u35 c428b0fc3e6c5be4d2c03dcd8bb27a7c
log u10 4855b27222d900bea47a27cadba71727
log u35 a0ba62c41a2c42e04bef9be158c573d1
log10 u10 ee979872a5a5181905eb7d04eefb3870
log u35 015f8ae899c9b921d48919dd12ef19a9
log10 u10 36645e8031d873d66fd0ec2c5959f273
log1p u10 1383924fb56cf2e7eda27de21320c591
exp u10 68339ce61fd14ae329e4c5f37a7f4c3c
exp2 u10 bccd14fa3878c93c7bdc8feca954344b
exp u10 084e5be89c2ad03e356078ea4f287bab
exp2 u10 6e36db9ae2cf9eca82e3d9157c622351
exp10 u10 0cc08bc6a3d08d6e61450b5370c6161e
expm1 u10 374817f0728c57b0ec019c3bfb141a79
pow u10 a2ae975d6aa593c5f47d38cd7514ddb3
expm1 u10 cd3f0b8e86943d52c278394b60e2d22e
pow u10 7e19796027d7c1d1999be948f90e6181
cbrt u10 5d8bf28ac74624594fd1be9217817690
cbrt u10 3c896e03746bcf1b3f70182dfec3d93b
cbrt u35 fc7ee3e3e6c54365d708b752c242a947
cbrt u35 2408714a56d74f8c82389ca6772cdbc1
hypot u05 cc2f18e409e19a02cadf7b91fd869120
hypot u35 be7bbd41dffd746b70261ee773cbd4b2
asin u10 7949c3ac5b9ca0a548a411bffe321632
asin u35 8ae753ef1085067fe59d5b3d9037f185
acos u10 1db2f6cdcebd9c46650c2e3c16ff6aba
acos u35 ed73f226b2bd5eed79cbf54404825e28
atan u10 5284563c51512b9785c89ec0620f518e
atan u10 9cf30e08e5d980ae3bcbcc4be77ed2f6
atan u35 585d2ff936bed7863b566003553c26fd
atan u35 c20c4d491e4b4265f7ccf4a0819cc82a
atan2 u10 45678ae1ca8075b8a7f40fb785c10b97
atan2 u35 936110373f2e1c44ebb479fdcc2d549f
sinh u10 775dc1a219c3703cd86b1332e66029e4
cosh u10 beb5e63186c0d580e11333e206aa31b1
asin u10 8a21b7c28cdaffc9d3e53f415367932e
asin u35 9c9e8107782898e9faed6924ad1b3cb1
acos u10 28261e4eb8331865660c814676d5c6bc
acos u35 310911130bfc45b10dabe3a072939331
atan u10 bfaf9c60689afb923c5c9637b3a711ca
atan u10 4e6b0112b6bb5a5fe936b9a01b8a7afc
atan u35 6161b6189609f105b017d8768d0a41f1
atan u35 6face71d8d93c69448d49ed6140e361d
atan2 u10 7ca84089be978aca61b04c5be6aaf353
atan2 u35 6a3e764125aab2a0a13e7a0d9ec02f7f
sinh u10 61d459b1f368087f6f23ebf8e9f0ea01
cosh u10 f77eb95f79e274c12b4e92dc0389259b
tanh u10 2bb9dd54ed0fa22bb5f3b6d557eb58a3
asinh u10 01136e54e2a434839530dda54f33cfdb
acosh u10 2f3c28c9ee2eb2b3d5659c6cb2a58e3e
atanh u10 601a77ba8c1d5175f2808b48a41260c1
lgamma u10 90cdc41063f4198c6ad592c0cdd0f5da
tgamma u10 cc1e96362ad9626d9c0c397426abfbac
tgamma u10 cb9a93844ad1713d2ab92ff5b6398150
erf u10 3f3c9bf4f8e5768c09c472cee4475e43
erfc u15 3465bc3addcaaa18368e654cb92320ef
erfc u15 3e247a54183eeddedc33e99c50118995
fabs bef2f2ac8a4789357e580b4da4f9b9fe
copysign 3219022f267464e3704f90558e8df3bc
fmax 4e4f5220ccfef191864c316df0d18fc0
@@ -60,7 +60,7 @@ sinf u35 f8f804eae1d9443103e81fec96293477
sinf u10 3f12a7381f1cbb1830d92b4ec72d21fe
cosf u35 f2f3d1c9f090cde9c02439608dc7066e
cosf u10 dc35f27fae65f63f0aa6ad241f8b387b
tanf u35 a1109709284ec790b31ef533c63212d6
tanf u35 68d42ad1fb412e6b8be3853461e61213
tanf u10 97df301d4f59e67d5318b5356b703f06
sincosf u10 a97124d810ec461c135dc4fb0c059b6f
sincosf u35 0cc521e52ae1227d311012c2919c1ff2
@@ -87,10 +87,10 @@ acosf u10 5180fde4b02a0ca4cd75f0a786a1bfeb
acosf u35 72b0e2f9791f90f1c43570b9e9ba893f
atanf u10 fa672e387a204055f735b7af98dd8a35
atanf u10 d017670c13bc221b68bc9ee5f41c4b5e
atanf u35 052c537c09b297322e825ac0b2f0339e
atanf u35 9b25b4a6d96ee5fed9ca58cc7f21cb71
atanf u35 f592e46eaa5d29583f86d3e336f20b6b
atanf u35 e7087fe40de46921826b373d10c40954
atan2f u10 275b2fa8ee554c45551bb142db9f8197
atan2f u35 1e3ae1d22d0ff3054b29f53760a1cade
atan2f u35 44b187851195d24bab2561eb8f4ff5d0
sinhf u10 45bc228a14c3e39eeb35e9764394a23e
coshf u10 838d441e85d415ef4fb1e5c5ea966a71
tanhf u10 d19f254d41e8726c748df87b95bc9acd
657 changes: 367 additions & 290 deletions src/libm/sleefdp.c

Large diffs are not rendered by default.

1,198 changes: 478 additions & 720 deletions src/libm/sleefsimddp.c

Large diffs are not rendered by default.

82 changes: 60 additions & 22 deletions src/libm/sleefsimdsp.c
Original file line number Diff line number Diff line change
@@ -214,6 +214,16 @@ extern const float rempitabsp[];

//

#define POLY2(x, c1, c0) vmla_vf_vf_vf_vf(x, vcast_vf_f(c1), vcast_vf_f(c0))
#define POLY3(x, x2, c2, c1, c0) vmla_vf_vf_vf_vf(x2, vcast_vf_f(c2), POLY2(x, c1, c0))
#define POLY4(x, x2, c3, c2, c1, c0) vmla_vf_vf_vf_vf(x2, POLY2(x, c3, c2), POLY2(x, c1, c0))
#define POLY5(x, x2, x4, c4, c3, c2, c1, c0) vmla_vf_vf_vf_vf(x4, vcast_vf_f(c4), POLY4(x, x2, c3, c2, c1, c0))
#define POLY6(x, x2, x4, c5, c4, c3, c2, c1, c0) vmla_vf_vf_vf_vf(x4, POLY2(x, c5, c4), POLY4(x, x2, c3, c2, c1, c0))
#define POLY7(x, x2, x4, c6, c5, c4, c3, c2, c1, c0) vmla_vf_vf_vf_vf(x4, POLY3(x, x2, c6, c5, c4), POLY4(x, x2, c3, c2, c1, c0))
#define POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0) vmla_vf_vf_vf_vf(x4, POLY4(x, x2, c7, c6, c5, c4), POLY4(x, x2, c3, c2, c1, c0))

//

#include "df.h"

static INLINE CONST VECTOR_CC vopmask visnegzero_vo_vf(vfloat d) {
@@ -638,12 +648,23 @@ EXPORT CONST VECTOR_CC vfloat xtanf(vfloat d) {
o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1));
x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(x)));

#if defined(ENABLE_NEON32)
u = vcast_vf_f(0.00927245803177356719970703f);
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00331984995864331722259521f));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0242998078465461730957031f));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0534495301544666290283203f));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.133383005857467651367188f));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.333331853151321411132812f));
#else
vfloat s2 = vmul_vf_vf_vf(s, s), s4 = vmul_vf_vf_vf(s2, s2);
u = POLY6(s, s2, s4,
0.00927245803177356719970703f,
0.00331984995864331722259521f,
0.0242998078465461730957031f,
0.0534495301544666290283203f,
0.133383005857467651367188f,
0.333331853151321411132812f);
#endif

u = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(u, x), x);

@@ -691,12 +712,23 @@ EXPORT CONST VECTOR_CC vfloat xtanf(vfloat d) {
o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1));
x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(x)));

#if defined(ENABLE_NEON32)
u = vcast_vf_f(0.00927245803177356719970703f);
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00331984995864331722259521f));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0242998078465461730957031f));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0534495301544666290283203f));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.133383005857467651367188f));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.333331853151321411132812f));
#else
vfloat s2 = vmul_vf_vf_vf(s, s), s4 = vmul_vf_vf_vf(s2, s2);
u = POLY6(s, s2, s4,
0.00927245803177356719970703f,
0.00331984995864331722259521f,
0.0242998078465461730957031f,
0.0534495301544666290283203f,
0.133383005857467651367188f,
0.333331853151321411132812f);
#endif

u = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(u, x), x);

@@ -1438,14 +1470,16 @@ EXPORT CONST VECTOR_CC vfloat xatanf(vfloat d) {

t = vmul_vf_vf_vf(s, s);

u = vcast_vf_f(0.00282363896258175373077393f);
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0159569028764963150024414f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.0425049886107444763183594f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0748900920152664184570312f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.106347933411598205566406f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.142027363181114196777344f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.199926957488059997558594f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.333331018686294555664062f));
vfloat t2 = vmul_vf_vf_vf(t, t), t4 = vmul_vf_vf_vf(t2, t2);
u = POLY8(t, t2, t4,
0.00282363896258175373077393f,
-0.0159569028764963150024414f,
0.0425049886107444763183594f,
-0.0748900920152664184570312f,
0.106347933411598205566406f,
-0.142027363181114196777344f,
0.199926957488059997558594f,
-0.333331018686294555664062f);

t = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(t, u), s);

@@ -1477,14 +1511,16 @@ static INLINE CONST VECTOR_CC vfloat atan2kf(vfloat y, vfloat x) {
s = vdiv_vf_vf_vf(s, t);
t = vmul_vf_vf_vf(s, s);

u = vcast_vf_f(0.00282363896258175373077393f);
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0159569028764963150024414f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.0425049886107444763183594f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0748900920152664184570312f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.106347933411598205566406f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.142027363181114196777344f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.199926957488059997558594f));
u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.333331018686294555664062f));
vfloat t2 = vmul_vf_vf_vf(t, t), t4 = vmul_vf_vf_vf(t2, t2);
u = POLY8(t, t2, t4,
0.00282363896258175373077393f,
-0.0159569028764963150024414f,
0.0425049886107444763183594f,
-0.0748900920152664184570312f,
0.106347933411598205566406f,
-0.142027363181114196777344f,
0.199926957488059997558594f,
-0.333331018686294555664062f);

t = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(t, u), s);
t = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f((float)(M_PI/2)), t);
@@ -1734,12 +1770,14 @@ static INLINE CONST VECTOR_CC vfloat expm1fk(vfloat d) {
s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf), d);
s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf), s);

u = vcast_vf_f(0.000198527617612853646278381);
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00139304355252534151077271));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833336077630519866943359));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416664853692054748535156));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.166666671633720397949219));
u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.5));
vfloat s2 = vmul_vf_vf_vf(s, s), s4 = vmul_vf_vf_vf(s2, s2);
u = POLY6(s, s2, s4,
0.000198527617612853646278381,
0.00139304355252534151077271,
0.00833336077630519866943359,
0.0416664853692054748535156,
0.166666671633720397949219,
0.5);

u = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, s), u, s);

73 changes: 45 additions & 28 deletions src/libm/sleefsp.c
Original file line number Diff line number Diff line change
@@ -30,6 +30,14 @@ extern const float rempitabsp[];
#pragma fp_contract (off)
#endif

#define POLY2(x, c1, c0) mlaf(x, c1, c0)
#define POLY3(x, x2, c2, c1, c0) mlaf(x2, c2, mlaf(x, c1, c0))
#define POLY4(x, x2, c3, c2, c1, c0) mlaf(x2, mlaf(x, c3, c2), mlaf(x, c1, c0))
#define POLY5(x, x2, x4, c4, c3, c2, c1, c0) mlaf(x4, c4, POLY4(x, x2, c3, c2, c1, c0))
#define POLY6(x, x2, x4, c5, c4, c3, c2, c1, c0) mlaf(x4, POLY2(x, c5, c4), POLY4(x, x2, c3, c2, c1, c0))
#define POLY7(x, x2, x4, c6, c5, c4, c3, c2, c1, c0) mlaf(x4, POLY3(x, x2, c6, c5, c4), POLY4(x, x2, c3, c2, c1, c0))
#define POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0) mlaf(x4, POLY4(x, x2, c7, c6, c5, c4), POLY4(x, x2, c3, c2, c1, c0))

static INLINE CONST int32_t floatToRawIntBits(float d) {
union {
float f;
@@ -828,12 +836,14 @@ EXPORT CONST float xtanf(float d) {

if ((q & 1) != 0) x = -x;

u = 0.00927245803177356719970703f;
u = mlaf(u, s, 0.00331984995864331722259521f);
u = mlaf(u, s, 0.0242998078465461730957031f);
u = mlaf(u, s, 0.0534495301544666290283203f);
u = mlaf(u, s, 0.133383005857467651367188f);
u = mlaf(u, s, 0.333331853151321411132812f);
float s2 = s * s, s4 = s2 * s2;
u = POLY6(s, s2, s4,
0.00927245803177356719970703f,
0.00331984995864331722259521f,
0.0242998078465461730957031f,
0.0534495301544666290283203f,
0.133383005857467651367188f,
0.333331853151321411132812f);

u = mlaf(s, u * x, x);

@@ -893,14 +903,16 @@ EXPORT CONST float xatanf(float s) {

t = s * s;

u = 0.00282363896258175373077393f;
u = mlaf(u, t, -0.0159569028764963150024414f);
u = mlaf(u, t, 0.0425049886107444763183594f);
u = mlaf(u, t, -0.0748900920152664184570312f);
u = mlaf(u, t, 0.106347933411598205566406f);
u = mlaf(u, t, -0.142027363181114196777344f);
u = mlaf(u, t, 0.199926957488059997558594f);
u = mlaf(u, t, -0.333331018686294555664062f);
float t2 = t * t, t4 = t2 * t2;
u = POLY8(t, t2, t4,
0.00282363896258175373077393f,
-0.0159569028764963150024414f,
0.0425049886107444763183594f,
-0.0748900920152664184570312f,
0.106347933411598205566406f,
-0.142027363181114196777344f,
0.199926957488059997558594f,
-0.333331018686294555664062f);

t = s + s * (t * u);

@@ -920,14 +932,16 @@ static INLINE CONST float atan2kf(float y, float x) {
s = y / x;
t = s * s;

u = 0.00282363896258175373077393f;
u = mlaf(u, t, -0.0159569028764963150024414f);
u = mlaf(u, t, 0.0425049886107444763183594f);
u = mlaf(u, t, -0.0748900920152664184570312f);
u = mlaf(u, t, 0.106347933411598205566406f);
u = mlaf(u, t, -0.142027363181114196777344f);
u = mlaf(u, t, 0.199926957488059997558594f);
u = mlaf(u, t, -0.333331018686294555664062f);
float t2 = t * t, t4 = t2 * t2;
u = POLY8(t, t2, t4,
0.00282363896258175373077393f,
-0.0159569028764963150024414f,
0.0425049886107444763183594f,
-0.0748900920152664184570312f,
0.106347933411598205566406f,
-0.142027363181114196777344f,
0.199926957488059997558594f,
-0.333331018686294555664062f);

t = u * t * s + s;
t = q * (float)(M_PI/2) + t;
@@ -1163,12 +1177,15 @@ static INLINE CONST float expm1kf(float d) {
s = mlaf(q, -L2Uf, d);
s = mlaf(q, -L2Lf, s);

u = 0.000198527617612853646278381;
u = mlaf(u, s, 0.00139304355252534151077271);
u = mlaf(u, s, 0.00833336077630519866943359);
u = mlaf(u, s, 0.0416664853692054748535156);
u = mlaf(u, s, 0.166666671633720397949219);
u = mlaf(u, s, 0.5);
float s2 = s * s, s4 = s2 * s2;
u = POLY6(s, s2, s4,
0.000198527617612853646278381,
0.00139304355252534151077271,
0.00833336077630519866943359,
0.0416664853692054748535156,
0.166666671633720397949219,
0.5);

u = s * s * u + s;

if (q != 0) u = ldexp2kf(u + 1, q) - 1;