diff --git a/src/probestack.rs b/src/probestack.rs index e59fe861..329e2726 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -44,7 +44,7 @@ #![cfg(not(windows))] // Windows already has builtins to do this #[naked] -#[no_mangle] +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg(target_arch = "x86_64")] pub unsafe extern fn __rust_probestack() { // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, @@ -53,28 +53,33 @@ pub unsafe extern fn __rust_probestack() { // The ABI here is that the stack frame size is located in `%eax`. Upon // return we're not supposed to modify `%esp` or `%eax`. asm!(" - lea 8(%rsp),%r11 // rsp before calling this routine -> r11 + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 - // Main loop, taken in one page increments. We're decrementing r11 by + // Main loop, taken in one page increments. We're decrementing rsp by // a page each time until there's less than a page remaining. We're // guaranteed that this function isn't called unless there's more than a - // page needed + // page needed. + // + // Note that we're also testing against `8(%rsp)` to account for the 8 + // bytes pushed on the stack orginally with our return address. Using + // `8(%rsp)` simulates us testing the stack pointer in the caller's + // context. 2: + sub $$0x1000,%rsp + test %rsp,8(%rsp) sub $$0x1000,%r11 - test %r11,(%r11) - sub $$0x1000,%rax - cmp $$0x1000,%rax + cmp $$0x1000,%r11 ja 2b // Finish up the last remaining stack space requested, getting the last - // bits out of rax - sub %rax,%r11 - test %r11,(%r11) + // bits out of r11 + sub %r11,%rsp + test %rsp,8(%rsp) - // We now know that %r11 is (%rsp + 8 - %rax) so to recover rax - // we calculate (%rsp + 8) - %r11 which will give us %rax - lea 8(%rsp),%rax - sub %r11,%rax + // Restore the stack pointer to what it previously was when entering + // this function. The caller will readjust the stack pointer after we + // return. + add %rax,%rsp ret "); @@ -82,7 +87,7 @@ pub unsafe extern fn __rust_probestack() { } #[naked] -#[no_mangle] +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg(target_arch = "x86")] pub unsafe extern fn __rust_probestack() { // This is the same as x86_64 above, only translated for 32-bit sizes. Note @@ -92,19 +97,18 @@ pub unsafe extern fn __rust_probestack() { // The ABI here is the same as x86_64, except everything is 32-bits large. asm!(" push %ecx - lea 8(%esp),%ecx + mov %eax,%ecx 2: + sub $$0x1000,%esp + test %esp,8(%esp) sub $$0x1000,%ecx - test %ecx,(%ecx) - sub $$0x1000,%eax - cmp $$0x1000,%eax + cmp $$0x1000,%ecx ja 2b - sub %eax,%ecx - test %ecx,(%ecx) + sub %ecx,%esp + test %esp,8(%esp) - lea 8(%esp),%eax - sub %ecx,%eax + add %eax,%esp pop %ecx ret "); diff --git a/src/x86_64.rs b/src/x86_64.rs index 072f964d..64cc06c9 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -10,7 +10,7 @@ use core::intrinsics; #[cfg(windows)] #[naked] -#[no_mangle] +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn ___chkstk_ms() { asm!("push %rcx push %rax @@ -34,7 +34,7 @@ pub unsafe fn ___chkstk_ms() { #[cfg(windows)] #[naked] -#[no_mangle] +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn __alloca() { asm!("mov %rcx,%rax // x64 _alloca is a normal function with parameter in rcx jmp ___chkstk // Jump to ___chkstk since fallthrough may be unreliable"); @@ -43,7 +43,7 @@ pub unsafe fn __alloca() { #[cfg(windows)] #[naked] -#[no_mangle] +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn ___chkstk() { asm!("push %rcx cmp $$0x1000,%rax