From 57f570bb3304402038c906fd6bc79d0f42cfab26 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 3 Nov 2023 07:23:24 +0100 Subject: [PATCH 1/2] clarify that the str invariant is a safety, not validity, invariant --- library/core/src/primitive_docs.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/library/core/src/primitive_docs.rs b/library/core/src/primitive_docs.rs index f3695d16d7a6b..a0caa6c8216cb 100644 --- a/library/core/src/primitive_docs.rs +++ b/library/core/src/primitive_docs.rs @@ -291,7 +291,7 @@ mod prim_never {} /// Surrogate code points, used by UTF-16, are in the range 0xD800 to 0xDFFF. /// /// No `char` may be constructed, whether as a literal or at runtime, that is not a -/// Unicode scalar value: +/// Unicode scalar value. Violating this rule causes Undefined Behavior. /// /// ```compile_fail /// // Each of these is a compiler error @@ -308,9 +308,10 @@ mod prim_never {} /// let _ = unsafe { char::from_u32_unchecked(0x110000) }; /// ``` /// -/// USVs are also the exact set of values that may be encoded in UTF-8. Because -/// `char` values are USVs and `str` values are valid UTF-8, it is safe to store -/// any `char` in a `str` or read any character from a `str` as a `char`. +/// USVs are also the exact set of values that may be encoded in UTF-8. Because `char` values are +/// USVs and functions may assume [incoming `str` values are valid +/// UTF-8](primitive.str.html#invariant), it is safe to store any `char` in a `str` or read any +/// character from a `str` as a `char`. /// /// The gap in valid `char` values is understood by the compiler, so in the /// below example the two ranges are understood to cover the whole range of @@ -887,8 +888,6 @@ mod prim_slice {} /// type. It is usually seen in its borrowed form, `&str`. It is also the type /// of string literals, `&'static str`. /// -/// String slices are always valid UTF-8. -/// /// # Basic Usage /// /// String literals are string slices: @@ -942,6 +941,14 @@ mod prim_slice {} /// Note: This example shows the internals of `&str`. `unsafe` should not be /// used to get a string slice under normal circumstances. Use `as_str` /// instead. +/// +/// # Invariant +/// +/// Rust libraries may assume that string slices are always valid UTF-8. +/// +/// Constructing a non-UTF-8 string slice is not immediate Undefined Behavior, but any function +/// called on a string slice may assume that it is valid UTF-8, which means that a non-UTF-8 string +/// slice can lead to Undefined Behaviior down the road. #[stable(feature = "rust1", since = "1.0.0")] mod prim_str {} From 0550ba5f7743e2d2a979ff41f098283f5ad68000 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 3 Nov 2023 07:58:03 +0100 Subject: [PATCH 2/2] avoid acronyms when we don't really need them --- library/core/src/primitive_docs.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/library/core/src/primitive_docs.rs b/library/core/src/primitive_docs.rs index a0caa6c8216cb..8c7c44ca8d48e 100644 --- a/library/core/src/primitive_docs.rs +++ b/library/core/src/primitive_docs.rs @@ -291,7 +291,7 @@ mod prim_never {} /// Surrogate code points, used by UTF-16, are in the range 0xD800 to 0xDFFF. /// /// No `char` may be constructed, whether as a literal or at runtime, that is not a -/// Unicode scalar value. Violating this rule causes Undefined Behavior. +/// Unicode scalar value. Violating this rule causes undefined behavior. /// /// ```compile_fail /// // Each of these is a compiler error @@ -308,10 +308,10 @@ mod prim_never {} /// let _ = unsafe { char::from_u32_unchecked(0x110000) }; /// ``` /// -/// USVs are also the exact set of values that may be encoded in UTF-8. Because `char` values are -/// USVs and functions may assume [incoming `str` values are valid -/// UTF-8](primitive.str.html#invariant), it is safe to store any `char` in a `str` or read any -/// character from a `str` as a `char`. +/// Unicode scalar values are also the exact set of values that may be encoded in UTF-8. Because +/// `char` values are Unicode scalar values and functions may assume [incoming `str` values are +/// valid UTF-8](primitive.str.html#invariant), it is safe to store any `char` in a `str` or read +/// any character from a `str` as a `char`. /// /// The gap in valid `char` values is understood by the compiler, so in the /// below example the two ranges are understood to cover the whole range of @@ -325,11 +325,10 @@ mod prim_never {} /// }; /// ``` /// -/// All USVs are valid `char` values, but not all of them represent a real -/// character. Many USVs are not currently assigned to a character, but may be -/// in the future ("reserved"); some will never be a character -/// ("noncharacters"); and some may be given different meanings by different -/// users ("private use"). +/// All Unicode scalar values are valid `char` values, but not all of them represent a real +/// character. Many Unicode scalar values are not currently assigned to a character, but may be in +/// the future ("reserved"); some will never be a character ("noncharacters"); and some may be given +/// different meanings by different users ("private use"). /// /// [Unicode code point]: https://www.unicode.org/glossary/#code_point /// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value @@ -946,9 +945,9 @@ mod prim_slice {} /// /// Rust libraries may assume that string slices are always valid UTF-8. /// -/// Constructing a non-UTF-8 string slice is not immediate Undefined Behavior, but any function +/// Constructing a non-UTF-8 string slice is not immediate undefined behavior, but any function /// called on a string slice may assume that it is valid UTF-8, which means that a non-UTF-8 string -/// slice can lead to Undefined Behaviior down the road. +/// slice can lead to undefined behavior down the road. #[stable(feature = "rust1", since = "1.0.0")] mod prim_str {}