Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimize memset and memclr for ARM #164

Merged
merged 2 commits into from
Jul 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 70 additions & 26 deletions src/arm.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use core::intrinsics;
use core::{intrinsics, ptr};

#[cfg(feature = "mem")]
use mem::{memcpy, memmove, memset};
use mem;

// NOTE This function and the ones below are implemented using assembly because they using a custom
// calling convention which can't be implemented using a normal Rust function
Expand Down Expand Up @@ -60,65 +59,110 @@ pub unsafe fn __aeabi_ldivmod() {
intrinsics::unreachable();
}

// TODO: These aeabi_* functions should be defined as aliases
#[cfg(not(feature = "mem"))]
extern "C" {
fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8;
}

// FIXME: The `*4` and `*8` variants should be defined as aliases.

#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n);
mem::memcpy(dest, src, n);
}

#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n);
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) {
let mut dest = dest as *mut u32;
let mut src = src as *mut u32;

while n >= 4 {
ptr::write(dest, ptr::read(src));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to confirm, the 4 in the name is "aligned to 4 bytes", right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. That's why we can optimize the routine by doing these aligned 4-byte reads / writes here.

dest = dest.offset(1);
src = src.offset(1);
n -= 4;
}

__aeabi_memcpy(dest as *mut u8, src as *const u8, n);
}

#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n);
__aeabi_memcpy4(dest, src, n);
}

#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n);
mem::memmove(dest, src, n);
}

#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n);
__aeabi_memmove(dest, src, n);
}

#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n);
__aeabi_memmove(dest, src, n);
}

// Note the different argument order
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n);
// Note the different argument order
mem::memset(dest, c, n);
}

#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n);
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32) {
let mut dest = dest as *mut u32;

let byte = (c as u32) & 0xff;
let c = (byte << 24) | (byte << 16) | (byte << 8) | byte;

while n >= 4 {
ptr::write(dest, c);
dest = dest.offset(1);
n -= 4;
}

__aeabi_memset(dest as *mut u8, n, byte as i32);
}

#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n);
__aeabi_memset4(dest, n, c);
}

#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) {
memset(dest, 0, n);
__aeabi_memset(dest, n, 0);
}

#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) {
memset(dest, 0, n);
__aeabi_memset4(dest, n, 0);
}

#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) {
memset(dest, 0, n);
__aeabi_memset4(dest, n, 0);
}
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#![feature(i128_type)]
#![feature(repr_simd)]
#![feature(abi_unadjusted)]
#![feature(linkage)]
#![allow(unused_features)]
#![no_builtins]
#![unstable(feature = "compiler_builtins_lib",
Expand Down Expand Up @@ -45,7 +46,6 @@ mod macros;
pub mod int;
pub mod float;

#[cfg(feature = "mem")]
pub mod mem;

#[cfg(target_arch = "arm")]
Expand Down
8 changes: 4 additions & 4 deletions src/mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ type c_int = i16;
#[cfg(not(target_pointer_width = "16"))]
type c_int = i32;

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memcpy(dest: *mut u8,
src: *const u8,
n: usize)
Expand All @@ -18,7 +18,7 @@ pub unsafe extern "C" fn memcpy(dest: *mut u8,
dest
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memmove(dest: *mut u8,
src: *const u8,
n: usize)
Expand All @@ -41,7 +41,7 @@ pub unsafe extern "C" fn memmove(dest: *mut u8,
dest
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
let mut i = 0;
while i < n {
Expand All @@ -51,7 +51,7 @@ pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
s
}

#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
let mut i = 0;
while i < n {
Expand Down
58 changes: 58 additions & 0 deletions tests/aeabi_memclr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#![cfg(all(target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"))]
#![feature(compiler_builtins_lib)]
#![no_std]

extern crate compiler_builtins;

// test runner
extern crate utest_cortex_m_qemu;

// overrides `panic!`
#[macro_use]
extern crate utest_macros;

use core::mem;

macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}

extern "C" {
fn __aeabi_memclr4(dest: *mut u8, n: usize);
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
}

struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}

impl Aligned {
fn new() -> Self {
Aligned {
array: [0; 8],
_alignment: [],
}
}
}

#[test]
fn memclr4() {
let mut aligned = Aligned::new();;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;

for n in 0..9 {
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, 0xff);
__aeabi_memclr4(xs.as_mut_ptr(), n);
}

assert!(xs[0..n].iter().all(|x| *x == 0));
}
}
69 changes: 69 additions & 0 deletions tests/aeabi_memcpy.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#![cfg(all(target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"))]
#![feature(compiler_builtins_lib)]
#![no_std]

extern crate compiler_builtins;

// test runner
extern crate utest_cortex_m_qemu;

// overrides `panic!`
#[macro_use]
extern crate utest_macros;

macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}

extern "C" {
fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize);
fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize);
}

struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}

impl Aligned {
fn new(array: [u8; 8]) -> Self {
Aligned {
array: array,
_alignment: [],
}
}
}

#[test]
fn memcpy() {
let mut dest = [0; 4];
let src = [0xde, 0xad, 0xbe, 0xef];

for n in 0..dest.len() {
dest.copy_from_slice(&[0; 4]);

unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) }

assert_eq!(&dest[0..n], &src[0..n])
}
}

#[test]
fn memcpy4() {
let mut aligned = Aligned::new([0; 8]);
let dest = &mut aligned.array;
let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d];

for n in 0..dest.len() {
dest.copy_from_slice(&[0; 8]);

unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) }

assert_eq!(&dest[0..n], &src[0..n])
}
}
Loading