-
Notifications
You must be signed in to change notification settings - Fork 120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support changing OPENSSL_armcap with environment variable on Apple 64-bit ARM systems #1045
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
82cee09
use handle_cpu_env from cpu_aarch64_linux.c to cpu_aarch64_apple.c
billbo-yang e9821dd
always use OPENSSL_armcap() when detecting hardware capabilities on arm
billbo-yang d9acd27
refactor duplicated into cpu_aarch64.h/c
billbo-yang 46be6b7
add ifdefs to cpu_aarch64.h/c
billbo-yang cf0a7d3
Update comments
billbo-yang 12dd7bf
adjust line length of comment
03ba9ca
set arm capabilities functions to fillow format of x86 ones
92dcae9
make rest of the arm capability functions in line with x86 ones
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
// SPDX-License-Identifier: Apache-2.0 OR ISC | ||
|
||
#if defined(OPENSSL_AARCH64) && !defined(OPENSSL_STATIC_ARMCAP) | ||
|
||
#include "cpu_aarch64.h" | ||
|
||
void handle_cpu_env(uint32_t *out, const char *in) { | ||
const int invert = in[0] == '~'; | ||
const int or = in[0] == '|'; | ||
const int skip_first_byte = invert || or; | ||
const int hex = in[skip_first_byte] == '0' && in[skip_first_byte+1] == 'x'; | ||
uint32_t armcap = out[0]; | ||
|
||
int sscanf_result; | ||
uint32_t v; | ||
if (hex) { | ||
sscanf_result = sscanf(in + skip_first_byte + 2, "%" PRIx32, &v); | ||
} else { | ||
sscanf_result = sscanf(in + skip_first_byte, "%" PRIu32, &v); | ||
} | ||
|
||
if (!sscanf_result) { | ||
return; | ||
} | ||
|
||
// Detect if the user is trying to use the environment variable to set | ||
// a capability that is _not_ available on the CPU: | ||
// If the runtime capability check (e.g via getauxval() on Linux) | ||
// returned a non-zero hwcap in `armcap` (out) | ||
// and a bit set in the requested `v` is not set in `armcap`, | ||
// abort instead of crashing later. | ||
// The case of invert cannot enable an unexisting capability; | ||
// it can only disable an existing one. | ||
if (!invert && armcap && (~armcap & v)) | ||
{ | ||
fprintf(stderr, | ||
"Fatal Error: HW capability found: 0x%02X, but HW capability requested: 0x%02X.\n", | ||
armcap, v); | ||
exit(1); | ||
} | ||
|
||
if (invert) { | ||
out[0] &= ~v; | ||
} else if (or) { | ||
out[0] |= v; | ||
} else { | ||
out[0] = v; | ||
} | ||
} | ||
|
||
#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
// SPDX-License-Identifier: Apache-2.0 OR ISC | ||
|
||
#ifndef OPENSSL_HEADER_CPUCAP_CPU_AARCH64_H | ||
#define OPENSSL_HEADER_CPUCAP_CPU_AARCH64_H | ||
|
||
#if defined(__cplusplus) | ||
extern "C" { | ||
#endif | ||
|
||
#include <inttypes.h> | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
|
||
#if defined(OPENSSL_AARCH64) && !defined(OPENSSL_STATIC_ARMCAP) | ||
|
||
// cpu_aarch64 contains common functions used across multiple cpu_aarch64_* files | ||
|
||
// handle_cpu_env applies the value from |in| to the CPUID values in |out[0]|. | ||
// See the comment in |OPENSSL_cpuid_setup| about this. | ||
void handle_cpu_env(uint32_t *out, const char *in); | ||
|
||
#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP | ||
|
||
#if defined(__cplusplus) | ||
} | ||
#endif | ||
|
||
#endif // OPENSSL_HEADER_CPUCAP_CPU_AARCH64_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -167,52 +167,26 @@ extern uint32_t OPENSSL_armcap_P; | |
// |CRYPTO_is_ARMv8_AES_capable| and |CRYPTO_is_ARMv8_PMULL_capable| | ||
// for checking the support for AES and PMULL instructions, respectively. | ||
OPENSSL_INLINE int CRYPTO_is_NEON_capable(void) { | ||
#if defined(OPENSSL_STATIC_ARMCAP_NEON) || defined(__ARM_NEON) | ||
return 1; | ||
#elif defined(OPENSSL_STATIC_ARMCAP) | ||
return 0; | ||
#else | ||
return (OPENSSL_armcap_P & ARMV7_NEON) != 0; | ||
#endif | ||
} | ||
|
||
OPENSSL_INLINE int CRYPTO_is_ARMv8_AES_capable(void) { | ||
#if defined(OPENSSL_STATIC_ARMCAP_AES) || defined(__ARM_FEATURE_AES) | ||
return 1; | ||
#elif defined(OPENSSL_STATIC_ARMCAP) | ||
return 0; | ||
#else | ||
return (OPENSSL_armcap_P & ARMV8_AES) != 0; | ||
#endif | ||
} | ||
|
||
OPENSSL_INLINE int CRYPTO_is_ARMv8_PMULL_capable(void) { | ||
#if defined(OPENSSL_STATIC_ARMCAP_PMULL) || defined(__ARM_FEATURE_AES) | ||
return 1; | ||
#elif defined(OPENSSL_STATIC_ARMCAP) | ||
return 0; | ||
#else | ||
return (OPENSSL_armcap_P & ARMV8_PMULL) != 0; | ||
#endif | ||
} | ||
|
||
OPENSSL_INLINE int CRYPTO_is_ARMv8_GCM_8x_capable(void) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about this and the next function? Should we remove the |
||
#if defined(OPENSSL_STATIC_ARMCAP) | ||
return 0; | ||
#else | ||
return ((OPENSSL_armcap_P & ARMV8_SHA3) != 0 && | ||
((OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 || | ||
(OPENSSL_armcap_P & ARMV8_APPLE_M1) != 0)); | ||
#endif | ||
} | ||
|
||
OPENSSL_INLINE int CRYPTO_is_ARMv8_wide_multiplier_capable(void) { | ||
#if defined(OPENSSL_STATIC_ARMCAP) | ||
return 0; | ||
#else | ||
return (OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 || | ||
(OPENSSL_armcap_P & ARMV8_APPLE_M1) != 0; | ||
#endif | ||
} | ||
|
||
#endif // OPENSSL_ARM || OPENSSL_AARCH64 | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why were the second parts of the checks removed? Aren't these possible compiler flags that can set the capability statically?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
According to ACLE,
__ARM_NEON
is always set to 1 for AARCH64 platforms. As such, if we wantOPENSSL_armcap
to determine use of the C implementation, we can't check for this flag since it will always be set. We could use the-march
flag when compiling to disable NEON, but sinceOPENSSL_armcap
is supposed to determine processor capability at runtime I decided on this approach to reduce confusion.cpu_aarch64_apple.c
(as well as the counterpart files for other CPUs) still setOPENSSL_armcap
to be the correct value for the processor inOPENSSL_cpuid_setup()
, so removing the check doesn't disable NEON entirely, either.This applies to
__ARM_FEATURE_AES
as well.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Couldn't these values be used on ARM processors (32-bit)? We have users who build AWS-LC on them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cpucap.c detects if LC is built on 32-bit ARM and sets
OPENSSL_armcap
accordingly, so when we call a function likeCRYPTO_is_NEON_capable()
, even without a check for these values, it will correctly detect the CPU capabilities.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I may be missing something, but I think if
OPENSSL_STATIC_ARMCAP
is defined but notOPENSSL_STATIC_ARMCAP_NEON
due to an old compiler that defines__ARM_NEON
, it will not reach the check forOPENSSL_armcap
and will return 0.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I changed the functions to no longer include the
ifdef
statements, similar to how the capability functions for x86 processors in the same file do it. SinceOPENSSL_armcap_P
is set correctly even whenOPENSSL_STATIC_ARMCAP
is defined, the functions should still return the right result.Either way, a
BORINGSSL_DISPATCH_TEST
to ensure that this doesn't break anything will be introduced in a separate PR (which we should merge before this one)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we'd also need to measure the performance in FIPS build with and without the ifdefs for static build (possibly for dynamic as well) when the STATIC_ARMCAPs are defined. Fetching the OPENSSL_armcap_P variable had some implications on the static FIPS build in x86 #856.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Performed a benchmark on both the dynamic and static builds comparing performance of AES on main to the PR branch:
OPENSSL_STATIC_ARMCAP
and other static macros setNo performance differences between main & PR for either the dynamic nor static builds.
Full results can be seen here:
aws-lc-dynamic-staticarmcap-aes_main_vs_pr1045_bm.csv
aws-lc-static-staticarmcap-aes_main_vs_pr1045_bm.csv