Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aarch64 Miscompilation #1535

Closed
DrewRidley opened this issue Sep 20, 2024 · 17 comments · Fixed by #1536
Closed

Aarch64 Miscompilation #1535

DrewRidley opened this issue Sep 20, 2024 · 17 comments · Fixed by #1536
Labels
C-bug Category: This is a bug.

Comments

@DrewRidley
Copy link

DrewRidley commented Sep 20, 2024

Hey all, hope all is well.
I am trying to build my bevy application on my M3 Pro macbook. After fighting with feature flags for a few dependencies, I was able to remove all of the problematic neon intrinsics.

When running my application. I get a segfault within my netcode. When investigating with lldb, I discovered the following:

(lldb) disassemble --frame
spacebandits`_$LT$std..sys_common..net..LookupHost$u20$as$u20$core..convert..TryFrom$LT$$LP$$RF$str$C$u16$RP$$GT$$GT$::try_from::_$u7b$$u7b$closure$u7d$$u7d$::h657e8aab7da6da0b:
    0x109db6a54 <+0>:   mov    x29, sp
    0x109db6a58 <+4>:   sub    sp, sp, #0x30
    0x109db6a5c <+8>:   tbnz   w0, #0x1, 0x109db6b10     ; <+188> at net.rs:205:41
    0x109db6a60 <+12>:  cmp    x0, #0x1
    0x109db6a64 <+16>:  b.eq   0x109db6b10               ; <+188> at net.rs:205:41
    0x109db6a68 <+20>:  add    x8, x0, #0x8
    0x109db6a6c <+24>:  cmp    x8, x0
    0x109db6a70 <+28>:  b.lo   0x109db6ac0               ; <+108> at maybe_uninit.rs:399:9
    0x109db6a74 <+32>:  add    x11, x0, #0x8
    0x109db6a78 <+36>:  add    x10, sp, #0x28
    0x109db6a7c <+40>:  str    x11, [x10]
    0x109db6a80 <+44>:  mov    x11, #0x1
    0x109db6a84 <+48>:  add    x12, sp, #0x20
    0x109db6a88 <+52>:  str    x11, [x12]
    0x109db6a8c <+56>:  ldr    x13, [x10]
    0x109db6a90 <+60>:  add    x12, sp, #0x18
    0x109db6a94 <+64>:  str    x13, [x12]
    0x109db6a98 <+68>:  mov    x13, #0x0
    0x109db6a9c <+72>:  add    x14, sp, #0x10
    0x109db6aa0 <+76>:  str    x13, [x14]
    0x109db6aa4 <+80>:  ldr    x14, [x12]
    0x109db6aa8 <+84>:  orr    x14, x14, #0x1
    0x109db6aac <+88>:  add    x15, sp, #0x8
    0x109db6ab0 <+92>:  str    x14, [x15]
    0x109db6ab4 <+96>:  mov    x15, sp
    0x109db6ab8 <+100>: str    x11, [x15]
    0x109db6abc <+104>: b      0x109db6b30               ; <+220> at net.rs:210:25
    0x109db6ac0 <+108>: adrp   x1, 7189
    0x109db6ac4 <+112>: add    x1, x1, #0x2b0            ; .Ldata14e
    0x109db6ac8 <+116>: ldr    x2, [x1]
    0x109db6acc <+120>: ldr    x1, [x1, #0x8]
    0x109db6ad0 <+124>: add    x3, sp, #0x20
    0x109db6ad4 <+128>: str    x2, [x3]
    0x109db6ad8 <+132>: add    x2, sp, #0x28
    0x109db6adc <+136>: str    x1, [x2]
    0x109db6ae0 <+140>: adrp   x3, 7189
    0x109db6ae4 <+144>: add    x3, x3, #0x2a0            ; .Ldata14d
    0x109db6ae8 <+148>: ldr    x4, [x3]
    0x109db6aec <+152>: ldr    x1, [x3, #0x8]
    0x109db6af0 <+156>: mov    x3, sp
    0x109db6af4 <+160>: str    x4, [x3]
    0x109db6af8 <+164>: add    x4, sp, #0x8
    0x109db6afc <+168>: str    x1, [x4]
    0x109db6b00 <+172>: ldr    x0, [x3]
    0x109db6b04 <+176>: add    sp, sp, #0x30
    0x109db6b08 <+180>: ldp    x29, x30, [sp], #0x10
    0x109db6b0c <+184>: ret
    0x109db6b10 <+188>: adrp   x7, 7189
    0x109db6b14 <+192>: add    x7, x7, #0x2a0            ; .Ldata14d
    0x109db6b18 <+196>: ldr    x8, [x7]
    0x109db6b1c <+200>: ldr    x7, [x7, #0x8]
    0x109db6b20 <+204>: mov    x9, sp
    0x109db6b24 <+208>: str    x8, [x9]
    0x109db6b28 <+212>: add    x8, sp, #0x8
    0x109db6b2c <+216>: str    x7, [x8]
    0x109db6b30 <+220>: mov    x10, sp
    0x109db6b34 <+224>: ldr    x0, [x10]
    0x109db6b38 <+228>: add    x10, sp, #0x8
    0x109db6b3c <+232>: ldr    x1, [x10]
    0x109db6b40 <+236>: add    sp, sp, #0x30
    0x109db6b44 <+240>: ldp    x29, x30, [sp], #0x10
    0x109db6b48 <+244>: ret

spacebandits`_$LT$std..sys_common..net..LookupHost$u20$as$u20$core..convert..TryFrom$LT$$LP$$RF$str$C$u16$RP$$GT$$GT$::try_from::_$u7b$$u7b$closure$u7d$$u7d$::h657e8aab7da6da0b:
    0x109db6b4c <+248>: stp    x29, x30, [sp, #-0x10]!
    0x109db6b50 <+252>: mov    x29, sp
    0x109db6b54 <+256>: sub    sp, sp, #0x60
    0x109db6b58 <+260>: add    x2, sp, #0x10
    0x109db6b5c <+264>: str    x0, [x2]
    0x109db6b60 <+268>: mov    x4, #0x1
    0x109db6b64 <+272>: mov    x5, #0x0
    0x109db6b68 <+276>: cmp    x0, xzr
    0x109db6b6c <+280>: csel   x5, x4, x5, ne
    0x109db6b70 <+284>: mov    x6, #0xffffffff
    0x109db6b74 <+288>: cmp    x5, x6
    0x109db6b78 <+292>: b.hi   0x109db6bf4               ; <+416> at net.rs:211:30
    0x109db6b7c <+296>: cmp    x0, #0x0
    0x109db6b80 <+300>: cset   x9, ne
    0x109db6b84 <+304>: uxtb   w9, w9
    0x109db6b88 <+308>: cmp    w9, #0x2
    0x109db6b8c <+312>: b.hs   0x109db6bf4               ; <+416> at net.rs:211:30
    0x109db6b90 <+316>: csel   x11, xzr, x9, hs
    0x109db6b94 <+320>: csdb
    0x109db6b98 <+324>: adr    x10, #0x10                ; <+340> at result.rs:772:17
    0x109db6b9c <+328>: ldrsw  x11, [x10, w11, uxtw #2]
    0x109db6ba0 <+332>: add    x10, x10, x11
    0x109db6ba4 <+336>: br     x10
    0x109db6ba8 <+340>: udf    #0x24
    0x109db6bac <+344>: udf    #0xc
    0x109db6bb0 <+348>: b      0x109db6bf4               ; <+416> at net.rs:211:30
    0x109db6bb4 <+352>: add    x11, sp, #0x30
    0x109db6bb8 <+356>: str    x0, [x11]
    0x109db6bbc <+360>: add    x12, sp, #0x40
    0x109db6bc0 <+364>: str    x0, [x12]
    0x109db6bc4 <+368>: bl     0x109dac224               ; core::sync::atomic::atomic_compare_exchange_weak::h07243b64f3fd0e79 + 184 at atomic.rs:3390:34
    0x109db6bc8 <+372>: b      0x109db6bd0               ; <+380> at result.rs:774:5
    0x109db6bcc <+376>: mov    x0, #0x0
    0x109db6bd0 <+380>: add    x1, sp, #0x50
    0x109db6bd4 <+384>: str    x0, [x1]
    0x109db6bd8 <+388>: add    x1, sp, #0x20
    0x109db6bdc <+392>: str    x0, [x1]
    0x109db6be0 <+396>: mov    x2, sp
    0x109db6be4 <+400>: str    x0, [x2]
    0x109db6be8 <+404>: add    sp, sp, #0x60
    0x109db6bec <+408>: ldp    x29, x30, [sp], #0x10
    0x109db6bf0 <+412>: ret
    0x109db6bf4 <+416>: udf    #0xc11f

spacebandits`_$LT$std..sys_common..net..LookupHost$u20$as$u20$core..convert..TryFrom$LT$$LP$$RF$str$C$u16$RP$$GT$$GT$::try_from::_$u7b$$u7b$closure$u7d$$u7d$::h657e8aab7da6da0b:
    0x109db6bf8 <+420>: stp    x29, x30, [sp, #-0x10]!
    0x109db6bfc <+424>: mov    x29, sp
    0x109db6c00 <+428>: sub    sp, sp, #0x50
    0x109db6c04 <+432>: add    x10, sp, #0x10
    0x109db6c08 <+436>: str    x0, [x10]
    0x109db6c0c <+440>: mov    w1, #0x0
    0x109db6c10 <+444>: add    x12, sp, #0x30
    0x109db6c14 <+448>: strb   w1, [x12]
->  0x109db6c18 <+452>: bl     0x109dad89c               ; _$LT$$RF$mut$u20$W$u20$as$u20$core..fmt..Write..write_fmt..SpecWriteFmt$GT$::spec_write_fmt::h6a3c139bdf9273a6 + 32
    0x109db6c1c <+456>: cbnz   x0, 0x109db6c40           ; std::sys_common::net::TcpStream::connect::h810e1671d445f7ea + 4 at net.rs:226:5
    0x109db6c20 <+460>: mov    x0, #0x0
    0x109db6c24 <+464>: add    x15, sp, #0x40
    0x109db6c28 <+468>: str    x0, [x15]
    0x109db6c2c <+472>: mov    x1, sp
    0x109db6c30 <+476>: str    x0, [x1]
    0x109db6c34 <+480>: add    sp, sp, #0x50
    0x109db6c38 <+484>: ldp    x29, x30, [sp], #0x10
(lldb)

The std..sys_common..net..LookupHost$u20$as$u20$core..convert..TryFrom implementation here returns x0 = 0x0000000000000010. This appears to be a niche optimization of the Ok variant. Once this function returns, the generated assembly tries to dereference that address, causing a segmentation fault.

(lldb) thread step-over
Process 60027 stopped
* thread #8, name = 'Compute Task Pool (0)', stop reason = EXC_BAD_ACCESS (code=1, address=0x10)
    frame #0: 0x0000000109dad90c spacebandits`_$LT$$RF$mut$u20$W$u20$as$u20$core..fmt..Write..write_fmt..SpecWriteFmt$GT$::spec_write_fmt::h6a3c139bdf9273a6 at mod.rs:444:32
Target 0: (spacebandits) stopped.
(lldb) register read
General Purpose Registers:
        x0 = 0x0000000000000010
        x1 = 0x0000000000000028
        x2 = 0x0000000109dad90c  spacebandits`_$LT$$RF$mut$u20$W$u20$as$u20$core..fmt..Write..write_fmt..SpecWriteFmt$GT$::spec_write_fmt::h6a3c139bdf9273a6 + 144 at mod.rs:444:32
        x3 = 0x0000000170c4c6b8
        x4 = 0x0000000109b0eaa8  spacebandits`_$LT$alloc..sync..Arc$LT$T$C$A$GT$$u20$as$u20$core..ops..drop..Drop$GT$::drop::h0a9e0cce25cfe565 + 8 at sync.rs:2477:5
        x5 = 0x0000000170c4c790
        x6 = 0x0000000170c4c798
        x7 = 0x0000000000000150
        x8 = 0x0000000000000001
        x9 = 0x00000000ffffffff
       x10 = 0x0000000170c4c510
       x11 = 0x0000000000000014
       x12 = 0x0000000000000000
       x13 = 0x00000000ffffffff
       x14 = 0x0000000109db76d8  spacebandits`std::sys_common::net::TcpStream::duplicate::h99461e7a148a41b2 + 212
       x15 = 0x0000000000000000
       x16 = 0x0000000000000131
       x17 = 0x00000001fde4a4a0
       x18 = 0x0000000000000000
       x19 = 0x000000014482bcc0
       x20 = 0x0000000170c4c600
       x21 = 0x000000016fdd9003
       x22 = 0x0000000170c4cbd0
       x23 = 0x0000000170c4c610
       x24 = 0x0000000170c4c620
       x25 = 0x0000000170c4c670
       x26 = 0x0000000170c4c680
       x27 = 0x0000000170c4cbb0
       x28 = 0x0000000170c4ca50
        fp = 0x0000000170c4c580
        lr = 0x0000000109db6c1c  spacebandits`_$LT$std..sys_common..net..LookupHost$u20$as$u20$core..convert..TryFrom$LT$$LP$$RF$str$C$u16$RP$$GT$$GT$::try_from::_$u7b$$u7b$closure$u7d$$u7d$::h657e8aab7da6da0b + 456 at result.rs:774:6
        sp = 0x0000000170c4c510
        pc = 0x0000000109dad90c  spacebandits`_$LT$$RF$mut$u20$W$u20$as$u20$core..fmt..Write..write_fmt..SpecWriteFmt$GT$::spec_write_fmt::h6a3c139bdf9273a6 + 144 at mod.rs:444:32
      cpsr = 0x80201400

(lldb) register read x0
      x0 = 0x0000000000000010
(lldb)

If it helps, I exclusively used cranelift for this build, so ABI should not be of concern here. It seems like its more likely that the generated CLIF is incorrectly representing this trait method, and producing unsound code.

If there are any suggestions on how to approach this problem, I can continue this investigation and submit a PR if cranelift is determined to be the cause.

Thanks again for all your help!

@bjorn3
Copy link
Member

bjorn3 commented Sep 20, 2024

Are you using the version at https://github.com/rust-lang/rustc_codegen_cranelift/releases/tag/dev (or one you compiled yourself) or are you using the rustc-codegen-cranelift-preview component? If the latter you are mixing LLVM and Cranelift compiled code as the standard library would be compiled using LLVM. I'm currently working on fixing a couple of ABI bugs in Cranelift that affect mixed LLVM/Cranelift binaries.

@bjorn3 bjorn3 added C-bug Category: This is a bug. A-abi Area: ABI handling labels Sep 20, 2024
@DrewRidley
Copy link
Author

DrewRidley commented Sep 20, 2024

I am uisng the cg_clif-aarch64-apple-darwin.tar.xz build published as of yesterday.

I did some more investigating and am currently trying to come up with a minimal reproduction. I will link to it when I do.

@DrewRidley
Copy link
Author

This issue is likely related to #1507. It turns out that same exact 0x10 value is polluting an atomic there. I was able to identify the backtrace for the atomic_load that causes the crashj:

::AtomicLink::get::h11118bcb74511f86 + 36
    frame #2: 0x0000000105a01b0c game`std::sys::sync::rwlock::queue::add_backlinks_and_find_tail::hb5ccb39373707ecf + 72
    frame #3: 0x0000000105a02280 game`std::sys::sync::rwlock::queue::RwLock::unlock_queue::h386971371b2dd975 + 80
    frame #4: 0x0000000105a0206c game`std::sys::sync::rwlock::queue::RwLock::lock_contended::h82ef38b2a58af3a6 + 1064
    frame #5: 0x0000000101ca6dbc game`_$LT$$BP$mut$u20$T$u20$as$u20$bevy_ptr..DebugEnsureAligned$GT$::debug_ensure_aligned::h0b8c1438e3c4763b at lib.rs:608:29
    frame #6: 0x0000000101eb53dc game`bevy_math::rects::rect::_::_$LT$impl$u20$bevy_reflect..reflect..Reflect$u20$for$u20$bevy_math..rects..rect..Rect$GT$::try_apply::hd1c6d5b4e855a5b1 at result.rs:1999:17
    frame #7: 0x00000001024927e0 game`_$LT$bevy_pbr..render..light..ShadowPassNode$u20$as$u20$bevy_render..render_graph..node..Node$GT$::run::_$u7b$$u7b$closure$u7d$$u7d$::hb222a0f761c1bf1f

My investigation isn't complete, but at the moment I suspect the value comes from the code generated by the bevy Reflect derive macro.

            #[inline]
            fn try_apply(
                &mut self,
                value: &dyn bevy_reflect::PartialReflect,
            ) -> ::core::result::Result<(), bevy_reflect::ApplyError> {
                if let bevy_reflect::ReflectRef::Struct(struct_value) = bevy_reflect::PartialReflect::reflect_ref(
                    value,
                ) {
                    for (i, value) in ::core::iter::Iterator::enumerate(
                        bevy_reflect::Struct::iter_fields(struct_value),
                    ) {
                        let name = bevy_reflect::Struct::name_at(struct_value, i)
                            .unwrap();
                        if let ::core::option::Option::Some(v) = bevy_reflect::Struct::field_mut(
                            self,
                            name,
                        ) {
                            bevy_reflect::PartialReflect::try_apply(v, value)?;
                        }
                    }
                } else {
                    return ::core::result::Result::Err(bevy_reflect::ApplyError::MismatchedKinds {
                        from_kind: bevy_reflect::PartialReflect::reflect_kind(value),
                        to_kind: bevy_reflect::ReflectKind::Struct,
                    });
                }
                ::core::result::Result::Ok(())
            }

If there are any suggestions or pointers on where to look next I would much appreciate it!

@DrewRidley
Copy link
Author

DrewRidley commented Sep 21, 2024

Hey all,
You can also reproduce with the following test:

fn main() {
    // Initialize the shared RwLock-protected Option<String> with Some("Hello")
    let shared_option: Arc<RwLock<Option<String>>> =
        Arc::new(RwLock::new(Some(String::from("Hello"))));

    // Vector to hold thread handles
    let mut handles = vec![];

    // Spawn multiple threads to read and write to the shared Option
    for i in 0..4 {
        let shared_clone = Arc::clone(&shared_option);

        let handle = thread::spawn(move || {
            if i % 2 == 0 {
                // Even-indexed threads perform read operations
                let opt = shared_clone.read().unwrap();
                if let Some(ref s) = *opt {
                    println!("Thread {}: Read value -> {}", i, s);
                } else {
                    println!("Thread {}: Read value -> None", i);
                }
            } else {
                // Odd-indexed threads perform write operations
                let mut opt = shared_clone.write().unwrap();
                *opt = None;
                println!("Thread {}: Wrote value -> None", i);
            }
        });

        handles.push(handle);
    }

    // Wait for all threads to finish
    for handle in handles {
        handle.join().unwrap();
    }

    // Final state check
    let final_opt = shared_option.read().unwrap();
    if let Some(ref s) = *final_opt {
        println!("Final state: {}", s);
    } else {
        println!("Final state: None");
    }
}

It seems the code generated by cranelift does not uphold the guarantees made by RwLock. There appears to be a data-race.

@bjorn3
Copy link
Member

bjorn3 commented Sep 21, 2024

Thanks for looking into this! I will take a look once I'm done with the aformentioned abi fixes.

@bjorn3 bjorn3 added O-arm Target: ARM processors (arm, thumb and AArch64 targets) and removed A-abi Area: ABI handling labels Sep 21, 2024
@Noratrieb
Copy link
Member

I have tried to reproduce this issue on aarch64-linux (Graviton dev desktop) and I couldn't initially, until it was pointed out to me that of course the rwlock, which is suspicous here, is completely different on MacOS and Linux.
So after applying a patch to use queue rwlock on Linux too, I was able to reproduce it fairly reliably on aarch64 Linux.
And... with that, I can also reproduce it on x86-64 Linux!

@Noratrieb
Copy link
Member

@bjorn3 bjorn3 removed the O-arm Target: ARM processors (arm, thumb and AArch64 targets) label Sep 21, 2024
@bjorn3
Copy link
Member

bjorn3 commented Sep 21, 2024

Removed the arm label given that it reproduces on x86_64 too.

@Noratrieb
Copy link
Member

(gdb) bt
#0  core::sync::atomic::atomic_load<*mut std::sys::sync::rwlock::queue::Node> () at /home/nora/other/rustc_codegen_cranelift/build/stdlib/library/core/src/sync/atomic.rs:3274
#1  0x0000565456a9f33f in std::sys::sync::rwlock::queue::{impl#0}::get () at /home/nora/other/rustc_codegen_cranelift/build/stdlib/library/core/src/sync/atomic.rs:1424
#2  0x0000565456a9f8f2 in std::sys::sync::rwlock::queue::add_backlinks_and_find_tail () at std/src/sys/sync/rwlock/queue.rs:254
#3  0x0000565456aa0330 in std::sys::sync::rwlock::queue::{impl#3}::unlock_queue () at std/src/sys/sync/rwlock/queue.rs:483
#4  0x0000565456aa024d in std::sys::sync::rwlock::queue::{impl#3}::unlock_contended () at std/src/sys/sync/rwlock/queue.rs:464
#5  0x0000565456a3e466 in <std::sync::rwlock::RwLockWriteGuard<T> as core::ops::drop::Drop>::drop ()
#6  0x0000565456a3b1fa in core::ptr::drop_in_place<std::sync::rwlock::RwLockWriteGuard<core::option::Option<alloc::string::String>>> ()
#7  0x0000565456a3ee9a in repro::main::{{closure}} ()
=> 0x0000565456a9e961 <+113>:   mov    (%rdi),%rax

it looks like the corrupted value here is the tail pointer of a node in the wait queue, accessed while trying to unlock the writer lock.
https://github.com/rust-lang/rust/blob/80aa6fa7314bddcfe95a68f4eb426b296bfdf4a9/library/std/src/sys/sync/rwlock/queue.rs#L254
the pointer that we're reading from (rdi) is 0x10. the tail is the 3rd pointer sized field in the Node, so this implies that our node pointer passed to add_backlinks_and_find_tail is null.
https://github.com/rust-lang/rust/blob/80aa6fa7314bddcfe95a68f4eb426b296bfdf4a9/library/std/src/sys/sync/rwlock/queue.rs#L483
the assembly of add_backlinks_and_find_tail backs this up, all it does is shuffling around its argument, offseting it by 0x10 and passing it to the function that reads it.

@Noratrieb
Copy link
Member

I copied the queue implementation into my own code and ran it: https://gist.github.com/Noratrieb/fc2733ff612e0dda6ce71553669fcb91 (and also changed it to 2 threads).
I immediately hit an unsafe precondition about creating a null NonNull. LLVM works perfectly fine.

Thread 0: Read value
thread '<unnamed>' panicked at core/src/panicking.rs:221:5:
unsafe precondition(s) violated: NonNull::new_unchecked requires that the pointer is non-null
stack backtrace:
   0: rust_begin_unwind
             at /home/nora/other/rustc_codegen_cranelift/build/stdlib/library/std/src/panicking.rs:665:5
   1: core::panicking::panic_nounwind_fmt
             at /home/nora/other/rustc_codegen_cranelift/build/stdlib/library/core/src/panicking.rs:112:18
   2: core::panicking::panic_nounwind
             at /home/nora/other/rustc_codegen_cranelift/build/stdlib/library/core/src/panicking.rs:221:5
   3: core::ptr::non_null::NonNull<T>::new_unchecked::precondition_check
   4: core::ptr::non_null::NonNull<T>::new_unchecked
   5: main::queue::to_node
   6: main::queue::RwLock::read_unlock_contended
   7: main::queue::RwLock::read_unlock
   8: main::main::{{closure}}
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
thread caused non-unwinding panic. aborting.
fish: Job 1, 'RUST_BACKTRACE=1 ./main' terminated by signal SIGABRT (Abort)

@bjorn3
Copy link
Member

bjorn3 commented Sep 22, 2024

I think the ptr_mask intrinsic requires bitwise-and with the inverse of the mask:

sym::ptr_mask => {
intrinsic_args!(fx, args => (ptr, mask); intrinsic);
let ptr = ptr.load_scalar(fx);
let mask = mask.load_scalar(fx);
fx.bcx.ins().band(ptr, mask);
}

@bjorn3
Copy link
Member

bjorn3 commented Sep 22, 2024

In particular currently the state.mask(MASK) in to_node would return the flags SINGLE | QUEUED rather than the pointer with the flags removed.

@Noratrieb
Copy link
Member

Noratrieb commented Sep 22, 2024

oh yeah. the ptr_mask doctest segfaults, so I guess that's a minimization^^:

     let mut v = 17_u32;
     let ptr: *mut u32 = &mut v;
    
     // `u32` is 4 bytes aligned,
     // which means that lower 2 bits are always 0.
     let tag_mask = 0b11;
     let ptr_mask = !tag_mask;
    
     // We can store something in these lower bits
     let tagged_ptr = ptr.map_addr(|a| a | 0b10);
    
     // Get the "tag" back
     let tag = tagged_ptr.addr() & tag_mask;
     assert_eq!(tag, 0b10);
    
     // Note that `tagged_ptr` is unaligned, it's UB to read from/write to it.
     // To get original pointer `mask` can be used:
     let masked_ptr = tagged_ptr.mask(ptr_mask);
     assert_eq!(unsafe { *masked_ptr }, 17);
    
     unsafe { *masked_ptr = 0 };
     assert_eq!(v, 0);

@Noratrieb
Copy link
Member

more minimal example:

    let mut v = 17_u32;
    let ptr: *mut u32 = &mut v;
    let masked_ptr = ptr.mask(!0b11);    
    unsafe { masked_ptr.read_volatile() };

@Noratrieb
Copy link
Member

0000000000015220 <_ZN4core3ptr7mut_ptr31_$LT$impl$u20$$BP$mut$u20$T$GT$4mask17h602a34ac5952876eE>:
   15220:	55                   	push   rbp
   15221:	48 89 e5             	mov    rbp,rsp
   15224:	48 31 c0             	xor    rax,rax
   15227:	48 89 ec             	mov    rsp,rbp
   1522a:	5d                   	pop    rbp
   1522b:	c3                   	ret

that's a quite creative way to implement pointer masking.

@bjorn3
Copy link
Member

bjorn3 commented Sep 22, 2024

I think the ptr_mask intrinsic requires bitwise-and with the inverse of the mask:

No, the currently implementation is actually fine. The issue is a missing write of the return value like

ret.write_cvalue(fx, CValue::by_val(res, base.layout()));

@Noratrieb
Copy link
Member

Yeah, I've fixed that locally and it works now. I'll put up a PR.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
C-bug Category: This is a bug.
Projects
None yet
Development

Successfully merging a pull request may close this issue.

3 participants