Skip to content

Commit

Permalink
cpu.rs: Refactor and Fix ARM/Aarch64 CPU features handling.
Browse files Browse the repository at this point in the history
Presently on aarch64-apple-* `GFp_armcap_P` is always zero. That's wrong; the
assembly language code needs it to be set correctly, or else the most optimized
code paths (NEON and/or SHA-2 extensions) will never be chosen.

Refactor the code so that `GFp_armcap_P` is set correctly, and to make it easier to
understand and maintain.

This will enable more optimized implementations on aarch64-apple-* targets, whereas
before the lowest common denominator implementations were being used for any
features that did the feature detection in assembly language code instead of Rust.

Move the definition of `GFp_armcap_P` to Rust so wouldn't have to keep C and Rust
code for it in sync. Remove the fallback definitions of `GFp_armcap_P` that use the
".comm"; they would always be set to zero if they were ever used, which wouldn't
(necessarily) match the static feature set. Removing them makes it clearer that
those definitions aren't used.
  • Loading branch information
briansmith committed Nov 18, 2020
1 parent 49065e8 commit 0f7a0c0
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 90 deletions.
1 change: 0 additions & 1 deletion crypto/chacha/asm/chacha-armv4.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1151,7 +1151,6 @@ sub NEONROUND {
add sp,sp,#4*(16+3)
ldmia sp!,{r4-r11,pc}
.size ChaCha20_neon,.-ChaCha20_neon
.comm GFp_armcap_P,4,4
#endif
___
}}}
Expand Down
6 changes: 0 additions & 6 deletions crypto/crypto.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,4 @@
// initialising it to zero, it becomes a "data symbol", which isn't so
// affected.
HIDDEN uint32_t GFp_ia32cap_P[4] = {0};
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)

#include <GFp/arm_arch.h>

HIDDEN uint32_t GFp_armcap_P = 0;

#endif
5 changes: 0 additions & 5 deletions crypto/fipsmodule/bn/asm/armv4-mont.pl
Original file line number Diff line number Diff line change
Expand Up @@ -744,11 +744,6 @@
}
$code.=<<___;
.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
#if __ARM_MAX_ARCH__>=7
.comm GFp_armcap_P,4,4
.hidden GFp_armcap_P
#endif
___

foreach (split("\n",$code)) {
Expand Down
5 changes: 0 additions & 5 deletions crypto/fipsmodule/sha/asm/sha256-armv4.pl
Original file line number Diff line number Diff line change
Expand Up @@ -687,11 +687,6 @@ ()
}}}
$code.=<<___;
.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.comm GFp_armcap_P,4,4
.hidden GFp_armcap_P
#endif
___

open SELF,$0;
Expand Down
5 changes: 0 additions & 5 deletions crypto/fipsmodule/sha/asm/sha512-armv4.pl
Original file line number Diff line number Diff line change
Expand Up @@ -651,11 +651,6 @@ ()
}
$code.=<<___;
.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.comm GFp_armcap_P,4,4
.hidden GFp_armcap_P
#endif
___

$code =~ s/\`([^\`]*)\`/eval $1/gem;
Expand Down
7 changes: 0 additions & 7 deletions crypto/fipsmodule/sha/asm/sha512-armv8.pl
Original file line number Diff line number Diff line change
Expand Up @@ -423,13 +423,6 @@ sub BODY_00_xx {
___
}

$code.=<<___;
#ifndef __KERNEL__
.comm GFp_armcap_P,4,4
.hidden GFp_armcap_P
#endif
___

{ my %opcode = (
"sha256h" => 0x5e004000, "sha256h2" => 0x5e005000,
"sha256su0" => 0x5e282800, "sha256su1" => 0x5e006000 );
Expand Down
172 changes: 111 additions & 61 deletions src/cpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ pub(crate) mod arm {
features |= PMULL.mask;
}
if caps & HWCAP_SHA2 == HWCAP_SHA2 {
features |= 1 << 4;
features |= SHA256.mask;
}

unsafe { GFp_armcap_P = features };
Expand Down Expand Up @@ -163,86 +163,136 @@ pub(crate) mod arm {
}
}

#[cfg(not(target_arch = "wasm32"))]
macro_rules! features {
{
$(
$name:ident {
mask: $mask:expr,

/// Should we assume that the feature is always available
/// for aarch64-apple-ios targets? The first AArch64 iOS
/// device used the Apple A7 chip.
// TODO: When we can use `if` in const expressions:
// ```
// aarch64_ios: $aarch64_ios,
// ```
aarch64_ios: true,
}
),+
, // trailing comma is required.
} => {
$(
#[allow(dead_code)]
pub(crate) const $name: Feature = Feature {
mask: $mask,
};
)+

// TODO: When we can use `if` in const expressions, do this:
// ```
// const ARMCAP_STATIC: u32 = 0
// $(
// | ( if $aarch64_ios &&
// cfg!(all(target_arch = "aarch64",
// target_os = "ios")) {
// $name.mask
// } else {
// 0
// }
// )
// )+;
// ```
//
// TODO: Add static feature detection to other targets.
// TODO: Combine static feature detection with runtime feature
// detection.
#[cfg(all(target_arch = "aarch64", target_os = "ios"))]
const ARMCAP_STATIC: u32 = 0
$( | $name.mask
)+;
#[cfg(not(all(target_arch = "aarch64", target_os = "ios")))]
const ARMCAP_STATIC: u32 = 0;

#[cfg(all(target_arch = "aarch64", target_os = "ios"))]
#[test]
fn test_armcap_static_available() {
let features = crate::cpu::features();
$(
assert!($name.available(features));
)+
}
}
}

#[allow(dead_code)]
pub(crate) struct Feature {
#[cfg_attr(
any(
target_os = "ios",
not(any(target_arch = "arm", target_arch = "aarch64"))
),
allow(dead_code)
)]
mask: u32,

#[cfg_attr(not(target_os = "ios"), allow(dead_code))]
ios: bool,
}

#[cfg(not(target_arch = "wasm32"))]
impl Feature {
#[allow(clippy::needless_return)]
#[allow(dead_code)]
#[inline(always)]
pub fn available(&self, _: super::Features) -> bool {
#[cfg(all(target_os = "ios", any(target_arch = "arm", target_arch = "aarch64")))]
{
return self.ios;
if self.mask == self.mask & ARMCAP_STATIC {
return true;
}

#[cfg(all(
any(target_os = "android", target_os = "fuchsia", target_os = "linux"),
any(target_arch = "arm", target_arch = "aarch64")
))]
{
return self.mask == self.mask & unsafe { GFp_armcap_P };
if self.mask == self.mask & unsafe { GFp_armcap_P } {
return true;
}
}

#[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
{
return false;
}
false
}
}

// Keep in sync with `ARMV7_NEON`.
#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm"),
not(target_os = "ios")
))]
pub(crate) const NEON: Feature = Feature {
mask: 1 << 0,
ios: true,
};

// Keep in sync with `ARMV8_AES`.
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "x86",
target_arch = "x86_64"
))]
pub(crate) const AES: Feature = Feature {
mask: 1 << 2,
ios: true,
};

// Keep in sync with `ARMV8_PMULL`.
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "x86",
target_arch = "x86_64"
))]
pub(crate) const PMULL: Feature = Feature {
mask: 1 << 5,
ios: true,
};
features! {
// Keep in sync with `ARMV7_NEON`.
NEON {
mask: 1 << 0,
aarch64_ios: true,
},

// Keep in sync with `ARMV8_AES`.
AES {
mask: 1 << 2,
aarch64_ios: true,
},

// Keep in sync with `ARMV8_SHA256`.
SHA256 {
mask: 1 << 4,
aarch64_ios: true,
},

// Keep in sync with `ARMV8_PMULL`.
PMULL {
mask: 1 << 5,
aarch64_ios: true,
},
}

#[cfg(all(
any(target_os = "android", target_os = "fuchsia", target_os = "linux"),
any(target_arch = "arm", target_arch = "aarch64")
))]
extern "C" {
static mut GFp_armcap_P: u32;
// Some non-Rust code still checks this even when it is statically known
// the given feature is available, so we have to ensure that this is
// initialized properly. Keep this in sync with the initialization in
// BoringSSL's crypto.c.
//
// TODO: This should have "hidden" visibility but we don't have a way of
// controlling that yet: https://github.com/rust-lang/rust/issues/73958.
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
#[no_mangle]
static mut GFp_armcap_P: u32 = ARMCAP_STATIC;

#[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), target_os = "ios"))]
#[test]
fn test_armcap_static_matches_armcap_dynamic() {
assert_eq!(ARMCAP_STATIC, 1 | 4 | 16 | 32);
assert_eq!(ARMCAP_STATIC, unsafe { GFp_armcap_P });
}
}

Expand Down

0 comments on commit 0f7a0c0

Please sign in to comment.