Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support changing OPENSSL_armcap with environment variable on Apple 64-bit ARM systems #1045

Merged
merged 8 commits into from
Jul 31, 2023
1 change: 1 addition & 0 deletions crypto/fipsmodule/bcm.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
#include "cipher/e_aesccm.c"

#include "cpucap/internal.h"
#include "cpucap/cpu_aarch64.c"
#include "cpucap/cpu_aarch64_apple.c"
#include "cpucap/cpu_aarch64_freebsd.c"
#include "cpucap/cpu_aarch64_fuchsia.c"
Expand Down
52 changes: 52 additions & 0 deletions crypto/fipsmodule/cpucap/cpu_aarch64.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC

#if defined(OPENSSL_AARCH64) && !defined(OPENSSL_STATIC_ARMCAP)

#include "cpu_aarch64.h"

void handle_cpu_env(uint32_t *out, const char *in) {
const int invert = in[0] == '~';
const int or = in[0] == '|';
const int skip_first_byte = invert || or;
const int hex = in[skip_first_byte] == '0' && in[skip_first_byte+1] == 'x';
uint32_t armcap = out[0];

int sscanf_result;
uint32_t v;
if (hex) {
sscanf_result = sscanf(in + skip_first_byte + 2, "%" PRIx32, &v);
} else {
sscanf_result = sscanf(in + skip_first_byte, "%" PRIu32, &v);
}

if (!sscanf_result) {
return;
}

// Detect if the user is trying to use the environment variable to set
// a capability that is _not_ available on the CPU:
// If the runtime capability check (e.g via getauxval() on Linux)
// returned a non-zero hwcap in `armcap` (out)
// and a bit set in the requested `v` is not set in `armcap`,
// abort instead of crashing later.
// The case of invert cannot enable an unexisting capability;
// it can only disable an existing one.
if (!invert && armcap && (~armcap & v))
{
fprintf(stderr,
"Fatal Error: HW capability found: 0x%02X, but HW capability requested: 0x%02X.\n",
armcap, v);
exit(1);
}

if (invert) {
out[0] &= ~v;
} else if (or) {
out[0] |= v;
} else {
out[0] = v;
}
}

#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP
31 changes: 31 additions & 0 deletions crypto/fipsmodule/cpucap/cpu_aarch64.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC

#ifndef OPENSSL_HEADER_CPUCAP_CPU_AARCH64_H
#define OPENSSL_HEADER_CPUCAP_CPU_AARCH64_H

#if defined(__cplusplus)
extern "C" {
#endif

#include <inttypes.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if defined(OPENSSL_AARCH64) && !defined(OPENSSL_STATIC_ARMCAP)

// cpu_aarch64 contains common functions used across multiple cpu_aarch64_* files

// handle_cpu_env applies the value from |in| to the CPUID values in |out[0]|.
// See the comment in |OPENSSL_cpuid_setup| about this.
void handle_cpu_env(uint32_t *out, const char *in);

#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP

#if defined(__cplusplus)
}
#endif

#endif // OPENSSL_HEADER_CPUCAP_CPU_AARCH64_H
16 changes: 16 additions & 0 deletions crypto/fipsmodule/cpucap/cpu_aarch64_apple.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <openssl/arm_arch.h>

#include "cpu_aarch64.h"

extern uint32_t OPENSSL_armcap_P;
extern uint8_t OPENSSL_cpucap_initialized;
Expand Down Expand Up @@ -98,6 +99,21 @@ void OPENSSL_cpuid_setup(void) {
OPENSSL_armcap_P |= ARMV8_APPLE_M1;
}

// OPENSSL_armcap is a 32-bit, unsigned value which may start with "0x" to
// indicate a hex value. Prior to the 32-bit value, a '~' or '|' may be given.
//
// If the '~' prefix is present:
// the value is inverted and ANDed with the probed CPUID result
// If the '|' prefix is present:
// the value is ORed with the probed CPUID result
// Otherwise:
// the value is taken as the result of the CPUID
const char *env;
env = getenv("OPENSSL_armcap");
if (env != NULL) {
handle_cpu_env(&OPENSSL_armcap_P, env);
}

OPENSSL_cpucap_initialized = 1;
}

Expand Down
51 changes: 1 addition & 50 deletions crypto/fipsmodule/cpucap/cpu_aarch64_linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,10 @@
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <openssl/arm_arch.h>

#include "cpu_aarch64.h"

extern uint32_t OPENSSL_armcap_P;
extern uint8_t OPENSSL_cpucap_initialized;
Expand All @@ -40,51 +36,6 @@ static uint64_t armv8_cpuid_probe(void) {
return val;
}

// handle_cpu_env applies the value from |in| to the CPUID values in |out[0]|.
// See the comment in |OPENSSL_cpuid_setup| about this.
static void handle_cpu_env(uint32_t *out, const char *in) {
const int invert = in[0] == '~';
const int or = in[0] == '|';
const int skip_first_byte = invert || or;
const int hex = in[skip_first_byte] == '0' && in[skip_first_byte+1] == 'x';
uint32_t armcap = out[0];

int sscanf_result;
uint32_t v;
if (hex) {
sscanf_result = sscanf(in + skip_first_byte + 2, "%" PRIx32, &v);
} else {
sscanf_result = sscanf(in + skip_first_byte, "%" PRIu32, &v);
}

if (!sscanf_result) {
return;
}

// Detect if the user is trying to use the environment variable to set
// a capability that is _not_ available on the CPU:
// If getauxval() returned a non-zero hwcap in `armcap` (out)
// and a bit set in the requested `v` is not set in `armcap`,
// abort instead of crashing later.
// The case of invert cannot enable an unexisting capability;
// it can only disable an existing one.
if (!invert && armcap && (~armcap & v))
{
fprintf(stderr,
"Fatal Error: HW capability found: 0x%02X, but HW capability requested: 0x%02X.\n",
armcap, v);
exit(1);
}

if (invert) {
out[0] &= ~v;
} else if (or) {
out[0] |= v;
} else {
out[0] = v;
}
}

void OPENSSL_cpuid_setup(void) {
unsigned long hwcap = getauxval(AT_HWCAP);

Expand Down
26 changes: 0 additions & 26 deletions crypto/fipsmodule/cpucap/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,52 +167,26 @@ extern uint32_t OPENSSL_armcap_P;
// |CRYPTO_is_ARMv8_AES_capable| and |CRYPTO_is_ARMv8_PMULL_capable|
// for checking the support for AES and PMULL instructions, respectively.
OPENSSL_INLINE int CRYPTO_is_NEON_capable(void) {
#if defined(OPENSSL_STATIC_ARMCAP_NEON) || defined(__ARM_NEON)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why were the second parts of the checks removed? Aren't these possible compiler flags that can set the capability statically?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to ACLE, __ARM_NEON is always set to 1 for AARCH64 platforms. As such, if we want OPENSSL_armcap to determine use of the C implementation, we can't check for this flag since it will always be set. We could use the -march flag when compiling to disable NEON, but since OPENSSL_armcap is supposed to determine processor capability at runtime I decided on this approach to reduce confusion.

cpu_aarch64_apple.c (as well as the counterpart files for other CPUs) still set OPENSSL_armcap to be the correct value for the processor in OPENSSL_cpuid_setup(), so removing the check doesn't disable NEON entirely, either.

This applies to __ARM_FEATURE_AES as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't these values be used on ARM processors (32-bit)? We have users who build AWS-LC on them.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cpucap.c detects if LC is built on 32-bit ARM and sets OPENSSL_armcap accordingly, so when we call a function like CRYPTO_is_NEON_capable(), even without a check for these values, it will correctly detect the CPU capabilities.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may be missing something, but I think if OPENSSL_STATIC_ARMCAP is defined but not OPENSSL_STATIC_ARMCAP_NEON due to an old compiler that defines __ARM_NEON, it will not reach the check for OPENSSL_armcap and will return 0.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the functions to no longer include the ifdef statements, similar to how the capability functions for x86 processors in the same file do it. Since OPENSSL_armcap_P is set correctly even when OPENSSL_STATIC_ARMCAP is defined, the functions should still return the right result.

Either way, a BORINGSSL_DISPATCH_TEST to ensure that this doesn't break anything will be introduced in a separate PR (which we should merge before this one)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we'd also need to measure the performance in FIPS build with and without the ifdefs for static build (possibly for dynamic as well) when the STATIC_ARMCAPs are defined. Fetching the OPENSSL_armcap_P variable had some implications on the static FIPS build in x86 #856.

Copy link
Contributor Author

@billbo-yang billbo-yang Jul 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Performed a benchmark on both the dynamic and static builds comparing performance of AES on main to the PR branch:

  • FIPS mode set
  • OPENSSL_STATIC_ARMCAP and other static macros set
  • Graviton2 on AL2

No performance differences between main & PR for either the dynamic nor static builds.

Full results can be seen here:
aws-lc-dynamic-staticarmcap-aes_main_vs_pr1045_bm.csv
aws-lc-static-staticarmcap-aes_main_vs_pr1045_bm.csv

return 1;
#elif defined(OPENSSL_STATIC_ARMCAP)
return 0;
#else
return (OPENSSL_armcap_P & ARMV7_NEON) != 0;
#endif
}

OPENSSL_INLINE int CRYPTO_is_ARMv8_AES_capable(void) {
#if defined(OPENSSL_STATIC_ARMCAP_AES) || defined(__ARM_FEATURE_AES)
return 1;
#elif defined(OPENSSL_STATIC_ARMCAP)
return 0;
#else
return (OPENSSL_armcap_P & ARMV8_AES) != 0;
#endif
}

OPENSSL_INLINE int CRYPTO_is_ARMv8_PMULL_capable(void) {
#if defined(OPENSSL_STATIC_ARMCAP_PMULL) || defined(__ARM_FEATURE_AES)
return 1;
#elif defined(OPENSSL_STATIC_ARMCAP)
return 0;
#else
return (OPENSSL_armcap_P & ARMV8_PMULL) != 0;
#endif
}

OPENSSL_INLINE int CRYPTO_is_ARMv8_GCM_8x_capable(void) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about this and the next function? Should we remove the #if defined(OPENSSL_STATIC_ARMCAP) branch in those too?

#if defined(OPENSSL_STATIC_ARMCAP)
return 0;
#else
return ((OPENSSL_armcap_P & ARMV8_SHA3) != 0 &&
((OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 ||
(OPENSSL_armcap_P & ARMV8_APPLE_M1) != 0));
#endif
}

OPENSSL_INLINE int CRYPTO_is_ARMv8_wide_multiplier_capable(void) {
#if defined(OPENSSL_STATIC_ARMCAP)
return 0;
#else
return (OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 ||
(OPENSSL_armcap_P & ARMV8_APPLE_M1) != 0;
#endif
}

#endif // OPENSSL_ARM || OPENSSL_AARCH64
Expand Down