diff --git a/library/std/src/sys/windows/c.rs b/library/std/src/sys/windows/c.rs index f43a19d91b657..dec886208103d 100644 --- a/library/std/src/sys/windows/c.rs +++ b/library/std/src/sys/windows/c.rs @@ -975,6 +975,7 @@ extern "system" { pub fn freeaddrinfo(res: *mut ADDRINFOA); pub fn GetProcAddress(handle: HMODULE, name: LPCSTR) -> *mut c_void; + pub fn GetModuleHandleA(lpModuleName: LPCSTR) -> HMODULE; pub fn GetModuleHandleW(lpModuleName: LPCWSTR) -> HMODULE; pub fn GetSystemTimeAsFileTime(lpSystemTimeAsFileTime: LPFILETIME); diff --git a/library/std/src/sys/windows/compat.rs b/library/std/src/sys/windows/compat.rs index e9588e2975825..017a4bbe97cc5 100644 --- a/library/std/src/sys/windows/compat.rs +++ b/library/std/src/sys/windows/compat.rs @@ -1,93 +1,116 @@ -//! A "compatibility layer" for spanning XP and Windows 7 +//! A "compatibility layer" for supporting older versions of Windows //! -//! The standard library currently binds many functions that are not available -//! on Windows XP, but we would also like to support building executables that -//! run on XP. To do this we specify all non-XP APIs as having a fallback -//! implementation to do something reasonable. +//! The standard library uses some Windows API functions that are not present +//! on older versions of Windows. (Note that the oldest version of Windows +//! that Rust supports is Windows 7 (client) and Windows Server 2008 (server).) +//! This module implements a form of delayed DLL import binding, using +//! `GetModuleHandle` and `GetProcAddress` to look up DLL entry points at +//! runtime. //! -//! This dynamic runtime detection of whether a function is available is -//! implemented with `GetModuleHandle` and `GetProcAddress` paired with a -//! static-per-function which caches the result of the first check. In this -//! manner we pay a semi-large one-time cost up front for detecting whether a -//! function is available but afterwards it's just a load and a jump. - -use crate::ffi::CString; -use crate::sys::c; - -pub fn lookup(module: &str, symbol: &str) -> Option { - let mut module: Vec = module.encode_utf16().collect(); - module.push(0); - let symbol = CString::new(symbol).unwrap(); - unsafe { - let handle = c::GetModuleHandleW(module.as_ptr()); - match c::GetProcAddress(handle, symbol.as_ptr()) as usize { - 0 => None, - n => Some(n), - } - } -} +//! This implementation uses a static initializer to look up the DLL entry +//! points. The CRT (C runtime) executes static initializers before `main` +//! is called (for binaries) and before `DllMain` is called (for DLLs). +//! This is the ideal time to look up DLL imports, because we are guaranteed +//! that no other threads will attempt to call these entry points. Thus, +//! we can look up the imports and store them in `static mut` fields +//! without any synchronization. +//! +//! This has an additional advantage: Because the DLL import lookup happens +//! at module initialization, the cost of these lookups is deterministic, +//! and is removed from the code paths that actually call the DLL imports. +//! That is, there is no unpredictable "cache miss" that occurs when calling +//! a DLL import. For applications that benefit from predictable delays, +//! this is a benefit. This also eliminates the comparison-and-branch +//! from the hot path. +//! +//! Currently, the standard library uses only a small number of dynamic +//! DLL imports. If this number grows substantially, then the cost of +//! performing all of the lookups at initialization time might become +//! substantial. +//! +//! The mechanism of registering a static initializer with the CRT is +//! documented in +//! [CRT Initialization](https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization?view=msvc-160). +//! It works by contributing a global symbol to the `.CRT$XCU` section. +//! The linker builds a table of all static initializer functions. +//! The CRT startup code then iterates that table, calling each +//! initializer function. +//! +//! # **WARNING!!* +//! The environment that a static initializer function runs in is highly +//! constrained. There are **many** restrictions on what static initializers +//! can safely do. Static initializer functions **MUST NOT** do any of the +//! following (this list is not comprehensive): +//! * touch any other static field that is used by a different static +//! initializer, because the order that static initializers run in +//! is not defined. +//! * call `LoadLibrary` or any other function that acquires the DLL +//! loader lock. +//! * call any Rust function or CRT function that touches any static +//! (global) state. macro_rules! compat_fn { ($module:literal: $( $(#[$meta:meta])* - pub fn $symbol:ident($($argname:ident: $argtype:ty),*) -> $rettype:ty $body:block + pub fn $symbol:ident($($argname:ident: $argtype:ty),*) -> $rettype:ty $fallback_body:block )*) => ($( $(#[$meta])* pub mod $symbol { #[allow(unused_imports)] use super::*; - use crate::sync::atomic::{AtomicUsize, Ordering}; use crate::mem; type F = unsafe extern "system" fn($($argtype),*) -> $rettype; - static PTR: AtomicUsize = AtomicUsize::new(0); - - #[allow(unused_variables)] - unsafe extern "system" fn fallback($($argname: $argtype),*) -> $rettype $body - - /// This address is stored in `PTR` to incidate an unavailable API. - /// - /// This way, call() will end up calling fallback() if it is unavailable. - /// - /// This is a `static` to avoid rustc duplicating `fn fallback()` - /// into both load() and is_available(), which would break - /// is_available()'s comparison. By using the same static variable - /// in both places, they'll refer to the same (copy of the) - /// function. + /// Points to the DLL import, or the fallback function. /// - /// LLVM merging the address of fallback with other functions - /// (because of unnamed_addr) is fine, since it's only compared to - /// an address from GetProcAddress from an external dll. - static FALLBACK: F = fallback; + /// This static can be an ordinary, unsynchronized, mutable static because + /// we guarantee that all of the writes finish during CRT initialization, + /// and all of the reads occur after CRT initialization. + static mut PTR: Option = None; - #[cold] - fn load() -> usize { - // There is no locking here. It's okay if this is executed by multiple threads in - // parallel. `lookup` will result in the same value, and it's okay if they overwrite - // eachothers result as long as they do so atomically. We don't need any guarantees - // about memory ordering, as this involves just a single atomic variable which is - // not used to protect or order anything else. - let addr = crate::sys::compat::lookup($module, stringify!($symbol)) - .unwrap_or(FALLBACK as usize); - PTR.store(addr, Ordering::Relaxed); - addr - } + /// This symbol is what allows the CRT to find the `init` function and call it. + /// It is marked `#[used]` because otherwise Rust would assume that it was not + /// used, and would remove it. + #[used] + #[link_section = ".CRT$XCU"] + static INIT_TABLE_ENTRY: fn() = init; - fn addr() -> usize { - match PTR.load(Ordering::Relaxed) { - 0 => load(), - addr => addr, + fn init() { + // There is no locking here. This code is executed before main() is entered, and + // is guaranteed to be single-threaded. + // + // DO NOT do anything interesting or complicated in this function! DO NOT call + // any Rust functions or CRT functions, if those functions touch any global state, + // because this function runs during global initialization. For example, DO NOT + // do any dynamic allocation, don't call LoadLibrary, etc. + unsafe { + let module_name: *const u8 = concat!($module, "\0").as_ptr(); + let symbol_name: *const u8 = concat!(stringify!($symbol), "\0").as_ptr(); + let module_handle = $crate::sys::c::GetModuleHandleA(module_name as *const i8); + if !module_handle.is_null() { + match $crate::sys::c::GetProcAddress(module_handle, symbol_name as *const i8) as usize { + 0 => {} + n => { + PTR = Some(mem::transmute::(n)); + } + } + } } } #[allow(dead_code)] - pub fn is_available() -> bool { - addr() != FALLBACK as usize + pub fn option() -> Option { + unsafe { PTR } } + #[allow(dead_code)] pub unsafe fn call($($argname: $argtype),*) -> $rettype { - mem::transmute::(addr())($($argname),*) + if let Some(ptr) = PTR { + ptr($($argname),*) + } else { + $fallback_body + } } } diff --git a/library/std/src/sys/windows/thread_parker.rs b/library/std/src/sys/windows/thread_parker.rs index 9e4c9aa0a512c..4f59d4dd452be 100644 --- a/library/std/src/sys/windows/thread_parker.rs +++ b/library/std/src/sys/windows/thread_parker.rs @@ -108,10 +108,10 @@ impl Parker { return; } - if c::WaitOnAddress::is_available() { + if let Some(wait_on_address) = c::WaitOnAddress::option() { loop { // Wait for something to happen, assuming it's still set to PARKED. - c::WaitOnAddress(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE); + wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE); // Change NOTIFIED=>EMPTY but leave PARKED alone. if self.state.compare_exchange(NOTIFIED, EMPTY, Acquire, Acquire).is_ok() { // Actually woken up by unpark(). @@ -140,9 +140,9 @@ impl Parker { return; } - if c::WaitOnAddress::is_available() { + if let Some(wait_on_address) = c::WaitOnAddress::option() { // Wait for something to happen, assuming it's still set to PARKED. - c::WaitOnAddress(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout)); + wait_on_address(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout)); // Set the state back to EMPTY (from either PARKED or NOTIFIED). // Note that we don't just write EMPTY, but use swap() to also // include an acquire-ordered read to synchronize with unpark()'s @@ -192,9 +192,9 @@ impl Parker { // purpose, to make sure every unpark() has a release-acquire ordering // with park(). if self.state.swap(NOTIFIED, Release) == PARKED { - if c::WakeByAddressSingle::is_available() { + if let Some(wake_by_address_single) = c::WakeByAddressSingle::option() { unsafe { - c::WakeByAddressSingle(self.ptr()); + wake_by_address_single(self.ptr()); } } else { // If we run NtReleaseKeyedEvent before the waiting thread runs