From 31ee4544dbe47903ce771270d6e3bea8654e9e50 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Mon, 26 Jun 2023 13:54:47 +0000 Subject: [PATCH] address review comments and fix CI - implement CAS 16 - remove useless commented out symbol name - support `feature("no-asm")` - fix warnings when `feature("c")` is enabled - rustfmt --- build.rs | 10 ++-- src/aarch64.rs | 103 +++++++++++++++++++++++++++++++---------- src/lib.rs | 6 ++- testcrate/tests/lse.rs | 7 ++- 4 files changed, 97 insertions(+), 29 deletions(-) diff --git a/build.rs b/build.rs index 266cc28b..4549d0b4 100644 --- a/build.rs +++ b/build.rs @@ -122,6 +122,9 @@ fn generate_aarch64_outlined_atomics() { macros.insert(sym, gen_macro(sym)); } + // Only CAS supports 16 bytes, and it has a different implementation that uses a different macro. + let mut cas16 = gen_macro("cas16"); + for ordering in [ Ordering::Relaxed, Ordering::Acquire, @@ -129,17 +132,18 @@ fn generate_aarch64_outlined_atomics() { Ordering::AcqRel, ] { let sym_ordering = aarch64_symbol(ordering); - // TODO: support CAS 16 - for size in [1, 2, 4, 8 /* , 16*/] { + for size in [1, 2, 4, 8] { for (sym, macro_) in &mut macros { let name = format!("__aarch64_{sym}{size}_{sym_ordering}"); writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap(); } } + let name = format!("__aarch64_cas16_{sym_ordering}"); + writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap(); } let mut buf = String::new(); - for macro_def in macros.values() { + for macro_def in macros.values().chain(std::iter::once(&cas16)) { buf += macro_def; buf += "}; }"; } diff --git a/src/aarch64.rs b/src/aarch64.rs index 01888065..1aaa1a69 100644 --- a/src/aarch64.rs +++ b/src/aarch64.rs @@ -12,31 +12,21 @@ //! Ported from `aarch64/lse.S` in LLVM's compiler-rt. //! //! Generate functions for each of the following symbols: +//! __aarch64_casM_ORDER //! __aarch64_swpN_ORDER //! __aarch64_ldaddN_ORDER //! __aarch64_ldclrN_ORDER //! __aarch64_ldeorN_ORDER //! __aarch64_ldsetN_ORDER -//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8}, ORDER = { relax, acq, rel, acq_rel } -//! -//! TODO: M = 16 +//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel } //! //! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants. //! We do something similar, but with macro arguments. -/// We don't do runtime dispatch so we don't have to worry about the global ctor. -/// Apparently MacOS uses a different number of underscores in the symbol name (???) -// #[cfg(target_vendor = "apple")] -// macro_rules! have_lse { -// () => { ___aarch64_have_lse_atomics } -// } - -// #[cfg(not(target_vendor = "apple"))] -// macro_rules! have_lse { -// () => { __aarch64_have_lse_atomics } -// } +// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor. /// Translate a byte size to a Rust type. +#[rustfmt::skip] macro_rules! int_ty { (1) => { i8 }; (2) => { i16 }; @@ -48,6 +38,7 @@ macro_rules! int_ty { /// Given a byte size and a register number, return a register of the appropriate size. /// /// See . +#[rustfmt::skip] macro_rules! reg { (1, $num:literal) => { concat!("w", $num) }; (2, $num:literal) => { concat!("w", $num) }; @@ -56,6 +47,7 @@ macro_rules! reg { } /// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction. +#[rustfmt::skip] macro_rules! acquire { (Relaxed) => { "" }; (Acquire) => { "a" }; @@ -64,6 +56,7 @@ macro_rules! acquire { } /// Given an atomic ordering, translate it to the release suffix for the stxr aarch64 ASM instruction. +#[rustfmt::skip] macro_rules! release { (Relaxed) => { "" }; (Acquire) => { "" }; @@ -72,6 +65,7 @@ macro_rules! release { } /// Given a size in bytes, translate it to the byte suffix for an aarch64 ASM instruction. +#[rustfmt::skip] macro_rules! size { (1) => { "b" }; (2) => { "h" }; @@ -84,6 +78,7 @@ macro_rules! size { /// with the correct semantics. /// /// See +#[rustfmt::skip] macro_rules! uxt { (1) => { "uxtb" }; (2) => { "uxth" }; @@ -95,7 +90,9 @@ macro_rules! uxt { /// /// See . macro_rules! ldxr { - ($ordering:ident, $bytes:tt) => { concat!("ld", acquire!($ordering), "xr", size!($bytes)) } + ($ordering:ident, $bytes:tt) => { + concat!("ld", acquire!($ordering), "xr", size!($bytes)) + }; } /// Given an atomic ordering and byte size, translate it to a STore eXclusive Register instruction @@ -103,7 +100,29 @@ macro_rules! ldxr { /// /// See . macro_rules! stxr { - ($ordering:ident, $bytes:tt) => { concat!("st", release!($ordering), "xr", size!($bytes)) } + ($ordering:ident, $bytes:tt) => { + concat!("st", release!($ordering), "xr", size!($bytes)) + }; +} + +/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction +/// with the correct semantics. +/// +/// See +macro_rules! ldxp { + ($ordering:ident) => { + concat!("ld", acquire!($ordering), "xp") + }; +} + +/// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction +/// with the correct semantics. +/// +/// See . +macro_rules! stxp { + ($ordering:ident) => { + concat!("st", release!($ordering), "xp") + }; } /// See . @@ -134,9 +153,38 @@ macro_rules! compare_and_swap { } } } } - } + }; } +// i128 uses a completely different impl, so it has its own macro. +macro_rules! compare_and_swap_i128 { + ($ordering:ident, $name:ident) => { + intrinsics! { + #[maybe_use_optimized_c_shim] + #[naked] + pub extern "C" fn $name ( + expected: i128, desired: i128, ptr: *mut i128 + ) -> i128 { + unsafe { core::arch::asm! { + "mov x16, x0", + "mov x17, x1", + "0:", + // LDXP x0, x1, [x4] + concat!(ldxp!($ordering), " x0, x1, [x4]"), + "cmp x0, x16", + "ccmp x1, x17, #0, eq", + "bne 1f", + // STXP w(tmp2), x2, x3, [x4] + concat!(stxp!($ordering), " w15, x2, x3, [x4]"), + "cbnz w15, 0b", + "1:", + "ret", + options(noreturn) + } } + } + } + }; +} /// See . macro_rules! swap { @@ -161,7 +209,7 @@ macro_rules! swap { } } } } - } + }; } /// See (e.g.) . @@ -194,28 +242,35 @@ macro_rules! fetch_op { // We need a single macro to pass to `foreach_ldadd`. macro_rules! add { - ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "add" } } + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "add" } + }; } macro_rules! and { - ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "bic" } } + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "bic" } + }; } macro_rules! xor { - ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "eor" } } + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "eor" } + }; } macro_rules! or { - ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "orr" } } + ($ordering:ident, $bytes:tt, $name:ident) => { + fetch_op! { $ordering, $bytes, $name, "orr" } + }; } // See `generate_aarch64_outlined_atomics` in build.rs. include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs")); foreach_cas!(compare_and_swap); +foreach_cas16!(compare_and_swap_i128); foreach_swp!(swap); foreach_ldadd!(add); foreach_ldclr!(and); foreach_ldeor!(xor); foreach_ldset!(or); - -// TODO: CAS 16 diff --git a/src/lib.rs b/src/lib.rs index 90b21f1f..4b44adc2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,7 +57,11 @@ pub mod mem; #[cfg(target_arch = "arm")] pub mod arm; -#[cfg(target_arch = "aarch64")] +#[cfg(all( + target_arch = "aarch64", + not(feature = "no-asm"), + not(feature = "optimized-c") +))] pub mod aarch64; #[cfg(all( diff --git a/testcrate/tests/lse.rs b/testcrate/tests/lse.rs index 49d73177..7b54ab5d 100644 --- a/testcrate/tests/lse.rs +++ b/testcrate/tests/lse.rs @@ -1,5 +1,5 @@ -#![cfg(target_arch = "aarch64")] #![feature(decl_macro)] // so we can use pub(super) +#![cfg(all(target_arch = "aarch64", not(feature = "no-asm")))] /// Translate a byte size to a Rust type. macro int_ty { @@ -38,6 +38,10 @@ mod cas { } } +macro test_cas16($_ordering:ident, $name:ident) { + cas::test!($_ordering, 16, $name); +} + mod swap { pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { #[test] @@ -81,6 +85,7 @@ test_op!(xor, std::ops::BitXor::bitxor); test_op!(or, std::ops::BitOr::bitor); compiler_builtins::foreach_cas!(cas::test); +compiler_builtins::foreach_cas16!(test_cas16); compiler_builtins::foreach_swp!(swap::test); compiler_builtins::foreach_ldadd!(add::test); compiler_builtins::foreach_ldclr!(clr::test);