auto merge of #5975 : huonw/rust/rustc-intrinsics-fixed-stack, r=pcwa…

…lton This implements the fixed_stack_segment for items with the rust-intrinsic abi, and then uses it to make f32 and f64 use intrinsics where appropriate, but without overflowing stacks and killing canaries (cf. #5686 and #5697). Hopefully. @pcwalton, the fixed_stack_segment implementation involved mirroring its implementation in `base.rs` in `trans_closure`, but without adding the `set_no_inline` (reasoning: that would defeat the purpose of intrinsics), which is possibly incorrect. I'm a little hazy about how the underlying structure works, so I've annotated the 4 that have caused problems so far, but there's no guarantee that the other intrinsics are entirely well-behaved. Anyway, it has good results (the following are just summing the result of each function for 1 up to 100 million): ``` $ ./intrinsics-perf.sh f32 func new old speedup sin 0.80 2.75 3.44 cos 0.80 2.76 3.45 sqrt 0.56 2.73 4.88 ln 1.01 2.94 2.91 log10 0.97 2.90 2.99 log2 1.01 2.95 2.92 exp 0.90 2.85 3.17 exp2 0.92 2.87 3.12 pow 6.95 8.57 1.23 geometric mean: 2.97 $ ./intrinsics-perf.sh f64 func new old speedup sin 12.08 14.06 1.16 cos 12.04 13.67 1.14 sqrt 0.49 2.73 5.57 ln 4.11 5.59 1.36 log10 5.09 6.54 1.28 log2 2.78 5.10 1.83 exp 2.00 3.97 1.99 exp2 1.71 3.71 2.17 pow 5.90 7.51 1.27 geometric mean: 1.72 ``` So about 3x faster on average for f32, and 1.7x for f64. This isn't exactly apples to apples though, since this patch also adds #[inline(always)] to all the function definitions too, which possibly gives a speedup. (fwiw, GitHub is showing 93c0888 after d9c54f8 (since I cherry-picked the latter from #5697), but git's order is the other way.)
rust-lang · Apr 20, 2013 · ae3b869 · ae3b869
2 parents 2b09267 + c5baeb1
commit ae3b869
Show file tree

Hide file tree

Showing 8 changed files with 188 additions and 162 deletions.
diff --git a/src/libcore/num/f32.rs b/src/libcore/num/f32.rs
@@ -10,12 +10,9 @@
 
 //! Operations and constants for `f32`
 
-use cmath;
-use libc::{c_float, c_int};
 use num::strconv;
 use num;
 use option::Option;
-use unstable::intrinsics::floorf32;
 use from_str;
 use to_str;
 
@@ -24,79 +21,93 @@ use to_str;
 
 pub use cmath::c_float_targ_consts::*;
 
+// An inner module is required to get the #[inline(always)] attribute on the
+// functions.
+pub use self::delegated::*;
+
 macro_rules! delegate(
     (
-        fn $name:ident(
-            $(
-                $arg:ident : $arg_ty:ty
-            ),*
-        ) -> $rv:ty = $bound_name:path
+        $(
+            fn $name:ident(
+                $(
+                    $arg:ident : $arg_ty:ty
+                ),*
+            ) -> $rv:ty = $bound_name:path
+        ),*
     ) => (
-        pub fn $name($( $arg : $arg_ty ),*) -> $rv {
-            unsafe {
-                $bound_name($( $arg ),*)
-            }
+        mod delegated {
+            use cmath::c_float_utils;
+            use libc::{c_float, c_int};
+            use unstable::intrinsics;
+
+            $(
+                #[inline(always)]
+                pub fn $name($( $arg : $arg_ty ),*) -> $rv {
+                    unsafe {
+                        $bound_name($( $arg ),*)
+                    }
+                }
+            )*
         }
     )
 )
 
-delegate!(fn acos(n: c_float) -> c_float = cmath::c_float_utils::acos)
-delegate!(fn asin(n: c_float) -> c_float = cmath::c_float_utils::asin)
-delegate!(fn atan(n: c_float) -> c_float = cmath::c_float_utils::atan)
-delegate!(fn atan2(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::atan2)
-delegate!(fn cbrt(n: c_float) -> c_float = cmath::c_float_utils::cbrt)
-delegate!(fn ceil(n: c_float) -> c_float = cmath::c_float_utils::ceil)
-delegate!(fn copysign(x: c_float, y: c_float) -> c_float =
-    cmath::c_float_utils::copysign)
-delegate!(fn cos(n: c_float) -> c_float = cmath::c_float_utils::cos)
-delegate!(fn cosh(n: c_float) -> c_float = cmath::c_float_utils::cosh)
-delegate!(fn erf(n: c_float) -> c_float = cmath::c_float_utils::erf)
-delegate!(fn erfc(n: c_float) -> c_float = cmath::c_float_utils::erfc)
-delegate!(fn exp(n: c_float) -> c_float = cmath::c_float_utils::exp)
-delegate!(fn expm1(n: c_float) -> c_float = cmath::c_float_utils::expm1)
-delegate!(fn exp2(n: c_float) -> c_float = cmath::c_float_utils::exp2)
-delegate!(fn abs(n: c_float) -> c_float = cmath::c_float_utils::abs)
-delegate!(fn abs_sub(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::abs_sub)
-delegate!(fn mul_add(a: c_float, b: c_float, c: c_float) -> c_float =
-    cmath::c_float_utils::mul_add)
-delegate!(fn fmax(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::fmax)
-delegate!(fn fmin(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::fmin)
-delegate!(fn nextafter(x: c_float, y: c_float) -> c_float =
-    cmath::c_float_utils::nextafter)
-delegate!(fn frexp(n: c_float, value: &mut c_int) -> c_float =
-    cmath::c_float_utils::frexp)
-delegate!(fn hypot(x: c_float, y: c_float) -> c_float =
-    cmath::c_float_utils::hypot)
-delegate!(fn ldexp(x: c_float, n: c_int) -> c_float =
-    cmath::c_float_utils::ldexp)
-delegate!(fn lgamma(n: c_float, sign: &mut c_int) -> c_float =
-    cmath::c_float_utils::lgamma)
-delegate!(fn ln(n: c_float) -> c_float = cmath::c_float_utils::ln)
-delegate!(fn log_radix(n: c_float) -> c_float =
-    cmath::c_float_utils::log_radix)
-delegate!(fn ln1p(n: c_float) -> c_float = cmath::c_float_utils::ln1p)
-delegate!(fn log10(n: c_float) -> c_float = cmath::c_float_utils::log10)
-delegate!(fn log2(n: c_float) -> c_float = cmath::c_float_utils::log2)
-delegate!(fn ilog_radix(n: c_float) -> c_int =
-    cmath::c_float_utils::ilog_radix)
-delegate!(fn modf(n: c_float, iptr: &mut c_float) -> c_float =
-    cmath::c_float_utils::modf)
-delegate!(fn pow(n: c_float, e: c_float) -> c_float =
-    cmath::c_float_utils::pow)
-delegate!(fn round(n: c_float) -> c_float = cmath::c_float_utils::round)
-delegate!(fn ldexp_radix(n: c_float, i: c_int) -> c_float =
-    cmath::c_float_utils::ldexp_radix)
-delegate!(fn sin(n: c_float) -> c_float = cmath::c_float_utils::sin)
-delegate!(fn sinh(n: c_float) -> c_float = cmath::c_float_utils::sinh)
-delegate!(fn sqrt(n: c_float) -> c_float = cmath::c_float_utils::sqrt)
-delegate!(fn tan(n: c_float) -> c_float = cmath::c_float_utils::tan)
-delegate!(fn tanh(n: c_float) -> c_float = cmath::c_float_utils::tanh)
-delegate!(fn tgamma(n: c_float) -> c_float = cmath::c_float_utils::tgamma)
-delegate!(fn trunc(n: c_float) -> c_float = cmath::c_float_utils::trunc)
+delegate!(
+    // intrinsics
+    fn abs(n: f32) -> f32 = intrinsics::fabsf32,
+    fn cos(n: f32) -> f32 = intrinsics::cosf32,
+    fn exp(n: f32) -> f32 = intrinsics::expf32,
+    fn exp2(n: f32) -> f32 = intrinsics::exp2f32,
+    fn floor(x: f32) -> f32 = intrinsics::floorf32,
+    fn ln(n: f32) -> f32 = intrinsics::logf32,
+    fn log10(n: f32) -> f32 = intrinsics::log10f32,
+    fn log2(n: f32) -> f32 = intrinsics::log2f32,
+    fn mul_add(a: f32, b: f32, c: f32) -> f32 = intrinsics::fmaf32,
+    fn pow(n: f32, e: f32) -> f32 = intrinsics::powf32,
+    fn powi(n: f32, e: c_int) -> f32 = intrinsics::powif32,
+    fn sin(n: f32) -> f32 = intrinsics::sinf32,
+    fn sqrt(n: f32) -> f32 = intrinsics::sqrtf32,
+
+    // LLVM 3.3 required to use intrinsics for these four
+    fn ceil(n: c_float) -> c_float = c_float_utils::ceil,
+    fn trunc(n: c_float) -> c_float = c_float_utils::trunc,
+    /*
+    fn ceil(n: f32) -> f32 = intrinsics::ceilf32,
+    fn trunc(n: f32) -> f32 = intrinsics::truncf32,
+    fn rint(n: f32) -> f32 = intrinsics::rintf32,
+    fn nearbyint(n: f32) -> f32 = intrinsics::nearbyintf32,
+    */
+
+    // cmath
+    fn acos(n: c_float) -> c_float = c_float_utils::acos,
+    fn asin(n: c_float) -> c_float = c_float_utils::asin,
+    fn atan(n: c_float) -> c_float = c_float_utils::atan,
+    fn atan2(a: c_float, b: c_float) -> c_float = c_float_utils::atan2,
+    fn cbrt(n: c_float) -> c_float = c_float_utils::cbrt,
+    fn copysign(x: c_float, y: c_float) -> c_float = c_float_utils::copysign,
+    fn cosh(n: c_float) -> c_float = c_float_utils::cosh,
+    fn erf(n: c_float) -> c_float = c_float_utils::erf,
+    fn erfc(n: c_float) -> c_float = c_float_utils::erfc,
+    fn expm1(n: c_float) -> c_float = c_float_utils::expm1,
+    fn abs_sub(a: c_float, b: c_float) -> c_float = c_float_utils::abs_sub,
+    fn fmax(a: c_float, b: c_float) -> c_float = c_float_utils::fmax,
+    fn fmin(a: c_float, b: c_float) -> c_float = c_float_utils::fmin,
+    fn nextafter(x: c_float, y: c_float) -> c_float = c_float_utils::nextafter,
+    fn frexp(n: c_float, value: &mut c_int) -> c_float = c_float_utils::frexp,
+    fn hypot(x: c_float, y: c_float) -> c_float = c_float_utils::hypot,
+    fn ldexp(x: c_float, n: c_int) -> c_float = c_float_utils::ldexp,
+    fn lgamma(n: c_float, sign: &mut c_int) -> c_float = c_float_utils::lgamma,
+    fn log_radix(n: c_float) -> c_float = c_float_utils::log_radix,
+    fn ln1p(n: c_float) -> c_float = c_float_utils::ln1p,
+    fn ilog_radix(n: c_float) -> c_int = c_float_utils::ilog_radix,
+    fn modf(n: c_float, iptr: &mut c_float) -> c_float = c_float_utils::modf,
+    fn round(n: c_float) -> c_float = c_float_utils::round,
+    fn ldexp_radix(n: c_float, i: c_int) -> c_float = c_float_utils::ldexp_radix,
+    fn sinh(n: c_float) -> c_float = c_float_utils::sinh,
+    fn tan(n: c_float) -> c_float = c_float_utils::tan,
+    fn tanh(n: c_float) -> c_float = c_float_utils::tanh,
+    fn tgamma(n: c_float) -> c_float = c_float_utils::tgamma)
+
 
 // These are not defined inside consts:: for consistency with
 // the integer types
@@ -143,9 +154,6 @@ pub fn ge(x: f32, y: f32) -> bool { return x >= y; }
 #[inline(always)]
 pub fn gt(x: f32, y: f32) -> bool { return x > y; }
 
-/// Returns `x` rounded down
-#[inline(always)]
-pub fn floor(x: f32) -> f32 { unsafe { floorf32(x) } }
 
 // FIXME (#1999): replace the predicates below with llvm intrinsics or
 // calls to the libmath macros in the rust runtime for performance.

diff --git a/src/libcore/num/f64.rs b/src/libcore/num/f64.rs
@@ -10,12 +10,9 @@
 
 //! Operations and constants for `f64`
 
-use cmath;
-use libc::{c_double, c_int};
 use num::strconv;
 use num;
 use option::Option;
-use unstable::intrinsics::floorf64;
 use to_str;
 use from_str;
 
@@ -25,87 +22,98 @@ use from_str;
 pub use cmath::c_double_targ_consts::*;
 pub use cmp::{min, max};
 
+// An inner module is required to get the #[inline(always)] attribute on the
+// functions.
+pub use self::delegated::*;
+
 macro_rules! delegate(
     (
-        fn $name:ident(
-            $(
-                $arg:ident : $arg_ty:ty
-            ),*
-        ) -> $rv:ty = $bound_name:path
+        $(
+            fn $name:ident(
+                $(
+                    $arg:ident : $arg_ty:ty
+                ),*
+            ) -> $rv:ty = $bound_name:path
+        ),*
     ) => (
-        pub fn $name($( $arg : $arg_ty ),*) -> $rv {
-            unsafe {
-                $bound_name($( $arg ),*)
-            }
+        mod delegated {
+            use cmath::c_double_utils;
+            use libc::{c_double, c_int};
+            use unstable::intrinsics;
+
+            $(
+                #[inline(always)]
+                pub fn $name($( $arg : $arg_ty ),*) -> $rv {
+                    unsafe {
+                        $bound_name($( $arg ),*)
+                    }
+                }
+            )*
         }
     )
 )
 
-delegate!(fn acos(n: c_double) -> c_double = cmath::c_double_utils::acos)
-delegate!(fn asin(n: c_double) -> c_double = cmath::c_double_utils::asin)
-delegate!(fn atan(n: c_double) -> c_double = cmath::c_double_utils::atan)
-delegate!(fn atan2(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::atan2)
-delegate!(fn cbrt(n: c_double) -> c_double = cmath::c_double_utils::cbrt)
-delegate!(fn ceil(n: c_double) -> c_double = cmath::c_double_utils::ceil)
-delegate!(fn copysign(x: c_double, y: c_double) -> c_double =
-    cmath::c_double_utils::copysign)
-delegate!(fn cos(n: c_double) -> c_double = cmath::c_double_utils::cos)
-delegate!(fn cosh(n: c_double) -> c_double = cmath::c_double_utils::cosh)
-delegate!(fn erf(n: c_double) -> c_double = cmath::c_double_utils::erf)
-delegate!(fn erfc(n: c_double) -> c_double = cmath::c_double_utils::erfc)
-delegate!(fn exp(n: c_double) -> c_double = cmath::c_double_utils::exp)
-delegate!(fn expm1(n: c_double) -> c_double = cmath::c_double_utils::expm1)
-delegate!(fn exp2(n: c_double) -> c_double = cmath::c_double_utils::exp2)
-delegate!(fn abs(n: c_double) -> c_double = cmath::c_double_utils::abs)
-delegate!(fn abs_sub(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::abs_sub)
-delegate!(fn mul_add(a: c_double, b: c_double, c: c_double) -> c_double =
-    cmath::c_double_utils::mul_add)
-delegate!(fn fmax(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::fmax)
-delegate!(fn fmin(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::fmin)
-delegate!(fn nextafter(x: c_double, y: c_double) -> c_double =
-    cmath::c_double_utils::nextafter)
-delegate!(fn frexp(n: c_double, value: &mut c_int) -> c_double =
-    cmath::c_double_utils::frexp)
-delegate!(fn hypot(x: c_double, y: c_double) -> c_double =
-    cmath::c_double_utils::hypot)
-delegate!(fn ldexp(x: c_double, n: c_int) -> c_double =
-    cmath::c_double_utils::ldexp)
-delegate!(fn lgamma(n: c_double, sign: &mut c_int) -> c_double =
-    cmath::c_double_utils::lgamma)
-delegate!(fn ln(n: c_double) -> c_double = cmath::c_double_utils::ln)
-delegate!(fn log_radix(n: c_double) -> c_double =
-    cmath::c_double_utils::log_radix)
-delegate!(fn ln1p(n: c_double) -> c_double = cmath::c_double_utils::ln1p)
-delegate!(fn log10(n: c_double) -> c_double = cmath::c_double_utils::log10)
-delegate!(fn log2(n: c_double) -> c_double = cmath::c_double_utils::log2)
-delegate!(fn ilog_radix(n: c_double) -> c_int =
-    cmath::c_double_utils::ilog_radix)
-delegate!(fn modf(n: c_double, iptr: &mut c_double) -> c_double =
-    cmath::c_double_utils::modf)
-delegate!(fn pow(n: c_double, e: c_double) -> c_double =
-    cmath::c_double_utils::pow)
-delegate!(fn round(n: c_double) -> c_double = cmath::c_double_utils::round)
-delegate!(fn ldexp_radix(n: c_double, i: c_int) -> c_double =
-    cmath::c_double_utils::ldexp_radix)
-delegate!(fn sin(n: c_double) -> c_double = cmath::c_double_utils::sin)
-delegate!(fn sinh(n: c_double) -> c_double = cmath::c_double_utils::sinh)
-delegate!(fn sqrt(n: c_double) -> c_double = cmath::c_double_utils::sqrt)
-delegate!(fn tan(n: c_double) -> c_double = cmath::c_double_utils::tan)
-delegate!(fn tanh(n: c_double) -> c_double = cmath::c_double_utils::tanh)
-delegate!(fn tgamma(n: c_double) -> c_double = cmath::c_double_utils::tgamma)
-delegate!(fn trunc(n: c_double) -> c_double = cmath::c_double_utils::trunc)
-delegate!(fn j0(n: c_double) -> c_double = cmath::c_double_utils::j0)
-delegate!(fn j1(n: c_double) -> c_double = cmath::c_double_utils::j1)
-delegate!(fn jn(i: c_int, n: c_double) -> c_double =
-    cmath::c_double_utils::jn)
-delegate!(fn y0(n: c_double) -> c_double = cmath::c_double_utils::y0)
-delegate!(fn y1(n: c_double) -> c_double = cmath::c_double_utils::y1)
-delegate!(fn yn(i: c_int, n: c_double) -> c_double =
-    cmath::c_double_utils::yn)
+delegate!(
+    // intrinsics
+    fn abs(n: f64) -> f64 = intrinsics::fabsf64,
+    fn cos(n: f64) -> f64 = intrinsics::cosf64,
+    fn exp(n: f64) -> f64 = intrinsics::expf64,
+    fn exp2(n: f64) -> f64 = intrinsics::exp2f64,
+    fn floor(x: f64) -> f64 = intrinsics::floorf64,
+    fn ln(n: f64) -> f64 = intrinsics::logf64,
+    fn log10(n: f64) -> f64 = intrinsics::log10f64,
+    fn log2(n: f64) -> f64 = intrinsics::log2f64,
+    fn mul_add(a: f64, b: f64, c: f64) -> f64 = intrinsics::fmaf64,
+    fn pow(n: f64, e: f64) -> f64 = intrinsics::powf64,
+    fn powi(n: f64, e: c_int) -> f64 = intrinsics::powif64,
+    fn sin(n: f64) -> f64 = intrinsics::sinf64,
+    fn sqrt(n: f64) -> f64 = intrinsics::sqrtf64,
+
+    // LLVM 3.3 required to use intrinsics for these four
+    fn ceil(n: c_double) -> c_double = c_double_utils::ceil,
+    fn trunc(n: c_double) -> c_double = c_double_utils::trunc,
+    /*
+    fn ceil(n: f64) -> f64 = intrinsics::ceilf64,
+    fn trunc(n: f64) -> f64 = intrinsics::truncf64,
+    fn rint(n: c_double) -> c_double = intrinsics::rintf64,
+    fn nearbyint(n: c_double) -> c_double = intrinsics::nearbyintf64,
+    */
+
+    // cmath
+    fn acos(n: c_double) -> c_double = c_double_utils::acos,
+    fn asin(n: c_double) -> c_double = c_double_utils::asin,
+    fn atan(n: c_double) -> c_double = c_double_utils::atan,
+    fn atan2(a: c_double, b: c_double) -> c_double = c_double_utils::atan2,
+    fn cbrt(n: c_double) -> c_double = c_double_utils::cbrt,
+    fn copysign(x: c_double, y: c_double) -> c_double = c_double_utils::copysign,
+    fn cosh(n: c_double) -> c_double = c_double_utils::cosh,
+    fn erf(n: c_double) -> c_double = c_double_utils::erf,
+    fn erfc(n: c_double) -> c_double = c_double_utils::erfc,
+    fn expm1(n: c_double) -> c_double = c_double_utils::expm1,
+    fn abs_sub(a: c_double, b: c_double) -> c_double = c_double_utils::abs_sub,
+    fn fmax(a: c_double, b: c_double) -> c_double = c_double_utils::fmax,
+    fn fmin(a: c_double, b: c_double) -> c_double = c_double_utils::fmin,
+    fn nextafter(x: c_double, y: c_double) -> c_double = c_double_utils::nextafter,
+    fn frexp(n: c_double, value: &mut c_int) -> c_double = c_double_utils::frexp,
+    fn hypot(x: c_double, y: c_double) -> c_double = c_double_utils::hypot,
+    fn ldexp(x: c_double, n: c_int) -> c_double = c_double_utils::ldexp,
+    fn lgamma(n: c_double, sign: &mut c_int) -> c_double = c_double_utils::lgamma,
+    fn log_radix(n: c_double) -> c_double = c_double_utils::log_radix,
+    fn ln1p(n: c_double) -> c_double = c_double_utils::ln1p,
+    fn ilog_radix(n: c_double) -> c_int = c_double_utils::ilog_radix,
+    fn modf(n: c_double, iptr: &mut c_double) -> c_double = c_double_utils::modf,
+    fn round(n: c_double) -> c_double = c_double_utils::round,
+    fn ldexp_radix(n: c_double, i: c_int) -> c_double = c_double_utils::ldexp_radix,
+    fn sinh(n: c_double) -> c_double = c_double_utils::sinh,
+    fn tan(n: c_double) -> c_double = c_double_utils::tan,
+    fn tanh(n: c_double) -> c_double = c_double_utils::tanh,
+    fn tgamma(n: c_double) -> c_double = c_double_utils::tgamma,
+    fn j0(n: c_double) -> c_double = c_double_utils::j0,
+    fn j1(n: c_double) -> c_double = c_double_utils::j1,
+    fn jn(i: c_int, n: c_double) -> c_double = c_double_utils::jn,
+    fn y0(n: c_double) -> c_double = c_double_utils::y0,
+    fn y1(n: c_double) -> c_double = c_double_utils::y1,
+    fn yn(i: c_int, n: c_double) -> c_double = c_double_utils::yn)
 
 // FIXME (#1433): obtain these in a different way
 
@@ -218,9 +226,6 @@ pub fn is_finite(x: f64) -> bool {
     return !(is_NaN(x) || is_infinite(x));
 }
 
-/// Returns `x` rounded down
-#[inline(always)]
-pub fn floor(x: f64) -> f64 { unsafe { floorf64(x) } }
 
 // FIXME (#1999): add is_normal, is_subnormal, and fpclassify
 

diff --git a/src/libcore/num/float.rs b/src/libcore/num/float.rs
@@ -36,7 +36,7 @@ pub use f64::{acos, asin, atan2, cbrt, ceil, copysign, cosh, floor};
 pub use f64::{erf, erfc, exp, expm1, exp2, abs_sub};
 pub use f64::{mul_add, fmax, fmin, nextafter, frexp, hypot, ldexp};
 pub use f64::{lgamma, ln, log_radix, ln1p, log10, log2, ilog_radix};
-pub use f64::{modf, pow, round, sinh, tanh, tgamma, trunc};
+pub use f64::{modf, pow, powi, round, sinh, tanh, tgamma, trunc};
 pub use f64::signbit;
 pub use f64::{j0, j1, jn, y0, y1, yn};