Skip to content

Commit

Permalink
librustc: use LLVM intrinsics for several floating point operations.
Browse files Browse the repository at this point in the history
Achieves up to 5x speed up! However, the intrinsics seem to do bad
things to the stack, especially sin, cos and exp (rust-lang#5686 has
discussion).

Also, add f{32,64,loat}::powi, and reorganise the delegation code so
that functions have the #[inline(always)] annotation, and reduce the
repetition of delegate!(..).
  • Loading branch information
huonw committed Apr 3, 2013
1 parent 2d52c0a commit 05bb618
Show file tree
Hide file tree
Showing 2 changed files with 231 additions and 153 deletions.
158 changes: 86 additions & 72 deletions src/libcore/num/f32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,10 @@

//! Operations and constants for `f32`
use cmath;
use libc::{c_float, c_int};
use num::NumCast;
use num::strconv;
use num;
use option::Option;
use unstable::intrinsics::floorf32;
use from_str;
use to_str;

Expand All @@ -25,79 +22,99 @@ use to_str;

pub use cmath::c_float_targ_consts::*;

// An inner module is required to get the #[inline(always)] attribute on the
// functions.
pub use self::delegated::*;

macro_rules! delegate(
(
fn $name:ident(
$(
$arg:ident : $arg_ty:ty
),*
) -> $rv:ty = $bound_name:path
$(
fn $name:ident(
$(
$arg:ident : $arg_ty:ty
),*
) -> $rv:ty = $bound_name:path
),*
) => (
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
mod delegated {
use cmath::c_float_utils;
use libc::{c_float, c_int};
use unstable::intrinsics;

$(
#[inline(always)]
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
}
)*
}
)
)

delegate!(fn acos(n: c_float) -> c_float = cmath::c_float_utils::acos)
delegate!(fn asin(n: c_float) -> c_float = cmath::c_float_utils::asin)
delegate!(fn atan(n: c_float) -> c_float = cmath::c_float_utils::atan)
delegate!(fn atan2(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::atan2)
delegate!(fn cbrt(n: c_float) -> c_float = cmath::c_float_utils::cbrt)
delegate!(fn ceil(n: c_float) -> c_float = cmath::c_float_utils::ceil)
delegate!(fn copysign(x: c_float, y: c_float) -> c_float =
cmath::c_float_utils::copysign)
delegate!(fn cos(n: c_float) -> c_float = cmath::c_float_utils::cos)
delegate!(fn cosh(n: c_float) -> c_float = cmath::c_float_utils::cosh)
delegate!(fn erf(n: c_float) -> c_float = cmath::c_float_utils::erf)
delegate!(fn erfc(n: c_float) -> c_float = cmath::c_float_utils::erfc)
delegate!(fn exp(n: c_float) -> c_float = cmath::c_float_utils::exp)
delegate!(fn expm1(n: c_float) -> c_float = cmath::c_float_utils::expm1)
delegate!(fn exp2(n: c_float) -> c_float = cmath::c_float_utils::exp2)
delegate!(fn abs(n: c_float) -> c_float = cmath::c_float_utils::abs)
delegate!(fn abs_sub(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::abs_sub)
delegate!(fn mul_add(a: c_float, b: c_float, c: c_float) -> c_float =
cmath::c_float_utils::mul_add)
delegate!(fn fmax(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::fmax)
delegate!(fn fmin(a: c_float, b: c_float) -> c_float =
cmath::c_float_utils::fmin)
delegate!(fn nextafter(x: c_float, y: c_float) -> c_float =
cmath::c_float_utils::nextafter)
delegate!(fn frexp(n: c_float, value: &mut c_int) -> c_float =
cmath::c_float_utils::frexp)
delegate!(fn hypot(x: c_float, y: c_float) -> c_float =
cmath::c_float_utils::hypot)
delegate!(fn ldexp(x: c_float, n: c_int) -> c_float =
cmath::c_float_utils::ldexp)
delegate!(fn lgamma(n: c_float, sign: &mut c_int) -> c_float =
cmath::c_float_utils::lgamma)
delegate!(fn ln(n: c_float) -> c_float = cmath::c_float_utils::ln)
delegate!(fn log_radix(n: c_float) -> c_float =
cmath::c_float_utils::log_radix)
delegate!(fn ln1p(n: c_float) -> c_float = cmath::c_float_utils::ln1p)
delegate!(fn log10(n: c_float) -> c_float = cmath::c_float_utils::log10)
delegate!(fn log2(n: c_float) -> c_float = cmath::c_float_utils::log2)
delegate!(fn ilog_radix(n: c_float) -> c_int =
cmath::c_float_utils::ilog_radix)
delegate!(fn modf(n: c_float, iptr: &mut c_float) -> c_float =
cmath::c_float_utils::modf)
delegate!(fn pow(n: c_float, e: c_float) -> c_float =
cmath::c_float_utils::pow)
delegate!(fn round(n: c_float) -> c_float = cmath::c_float_utils::round)
delegate!(fn ldexp_radix(n: c_float, i: c_int) -> c_float =
cmath::c_float_utils::ldexp_radix)
delegate!(fn sin(n: c_float) -> c_float = cmath::c_float_utils::sin)
delegate!(fn sinh(n: c_float) -> c_float = cmath::c_float_utils::sinh)
delegate!(fn sqrt(n: c_float) -> c_float = cmath::c_float_utils::sqrt)
delegate!(fn tan(n: c_float) -> c_float = cmath::c_float_utils::tan)
delegate!(fn tanh(n: c_float) -> c_float = cmath::c_float_utils::tanh)
delegate!(fn tgamma(n: c_float) -> c_float = cmath::c_float_utils::tgamma)
delegate!(fn trunc(n: c_float) -> c_float = cmath::c_float_utils::trunc)
delegate!(
// intrinsics
fn abs(n: f32) -> f32 = intrinsics::fabsf32,
fn exp2(n: f32) -> f32 = intrinsics::exp2f32,
fn floor(x: f32) -> f32 = intrinsics::floorf32,
fn ln(n: f32) -> f32 = intrinsics::logf32,
fn log10(n: f32) -> f32 = intrinsics::log10f32,
fn log2(n: f32) -> f32 = intrinsics::log2f32,
fn mul_add(a: f32, b: f32, c: f32) -> f32 = intrinsics::fmaf32,
fn pow(n: f32, e: f32) -> f32 = intrinsics::powf32,
fn powi(n: f32, e: c_int) -> f32 = intrinsics::powif32,
fn sqrt(n: f32) -> f32 = intrinsics::sqrtf32,

// FIXME: using intrinsics for these kill the stack canary more easily
// than those above (see discussion on #5686)
fn cos(n: c_float) -> c_float = c_float_utils::cos,
fn sin(n: c_float) -> c_float = c_float_utils::sin,
fn exp(n: f32) -> f32 = c_float_utils::exp,
fn cos_intr(n: f32) -> f32 = intrinsics::cosf32,
fn sin_intr(n: f32) -> f32 = intrinsics::sinf32,
fn exp_intr(n: f32) -> f32 = intrinsics::expf32,

// LLVM 3.3 required to use intrinsics for these four
fn ceil(n: c_float) -> c_float = c_float_utils::ceil,
fn trunc(n: c_float) -> c_float = c_float_utils::trunc,
/*
fn ceil(n: f32) -> f32 = intrinsics::ceilf32,
fn trunc(n: f32) -> f32 = intrinsics::truncf32,
fn rint(n: f32) -> f32 = intrinsics::rintf32,
fn nearbyint(n: f32) -> f32 = intrinsics::nearbyintf32,
*/

// cmath
fn acos(n: c_float) -> c_float = c_float_utils::acos,
fn asin(n: c_float) -> c_float = c_float_utils::asin,
fn atan(n: c_float) -> c_float = c_float_utils::atan,
fn atan2(a: c_float, b: c_float) -> c_float = c_float_utils::atan2,
fn cbrt(n: c_float) -> c_float = c_float_utils::cbrt,
fn copysign(x: c_float, y: c_float) -> c_float = c_float_utils::copysign,
fn cosh(n: c_float) -> c_float = c_float_utils::cosh,
fn erf(n: c_float) -> c_float = c_float_utils::erf,
fn erfc(n: c_float) -> c_float = c_float_utils::erfc,
fn expm1(n: c_float) -> c_float = c_float_utils::expm1,
fn abs_sub(a: c_float, b: c_float) -> c_float = c_float_utils::abs_sub,
fn fmax(a: c_float, b: c_float) -> c_float = c_float_utils::fmax,
fn fmin(a: c_float, b: c_float) -> c_float = c_float_utils::fmin,
fn nextafter(x: c_float, y: c_float) -> c_float = c_float_utils::nextafter,
fn frexp(n: c_float, value: &mut c_int) -> c_float = c_float_utils::frexp,
fn hypot(x: c_float, y: c_float) -> c_float = c_float_utils::hypot,
fn ldexp(x: c_float, n: c_int) -> c_float = c_float_utils::ldexp,
fn lgamma(n: c_float, sign: &mut c_int) -> c_float = c_float_utils::lgamma,
fn log_radix(n: c_float) -> c_float = c_float_utils::log_radix,
fn ln1p(n: c_float) -> c_float = c_float_utils::ln1p,
fn ilog_radix(n: c_float) -> c_int = c_float_utils::ilog_radix,
fn modf(n: c_float, iptr: &mut c_float) -> c_float = c_float_utils::modf,
fn round(n: c_float) -> c_float = c_float_utils::round,
fn ldexp_radix(n: c_float, i: c_int) -> c_float = c_float_utils::ldexp_radix,
fn sinh(n: c_float) -> c_float = c_float_utils::sinh,
fn tan(n: c_float) -> c_float = c_float_utils::tan,
fn tanh(n: c_float) -> c_float = c_float_utils::tanh,
fn tgamma(n: c_float) -> c_float = c_float_utils::tgamma)


// These are not defined inside consts:: for consistency with
// the integer types
Expand Down Expand Up @@ -144,9 +161,6 @@ pub fn ge(x: f32, y: f32) -> bool { return x >= y; }
#[inline(always)]
pub fn gt(x: f32, y: f32) -> bool { return x > y; }

/// Returns `x` rounded down
#[inline(always)]
pub fn floor(x: f32) -> f32 { unsafe { floorf32(x) } }

// FIXME (#1999): replace the predicates below with llvm intrinsics or
// calls to the libmath macros in the rust runtime for performance.
Expand Down
Loading

0 comments on commit 05bb618

Please sign in to comment.