Auto merge of rust-lang#95855 - Dylan-DPC:rollup-h45xmpw, r=Dylan-DPC

Rollup of 7 pull requests Successful merges: - rust-lang#94794 (Clarify indexing into Strings) - rust-lang#95361 (Make non-power-of-two alignments a validity error in `Layout`) - rust-lang#95369 (Fix `x test src/librustdoc` with `download-rustc` enabled ) - rust-lang#95805 (Left overs of rust-lang#95761) - rust-lang#95808 (expand: Remove `ParseSess::missing_fragment_specifiers`) - rust-lang#95817 (hide another #[allow] directive from a docs example) - rust-lang#95831 (Use bitwise XOR in to_ascii_uppercase) Failed merges: r? `@ghost` `@rustbot` modify labels: rollup
rust-lang-ci · Apr 9, 2022 · 8bf93e9 · 8bf93e9
2 parents 8c1fb2e + 7726265
commit 8bf93e9
Show file tree

Hide file tree

Showing 26 changed files with 569 additions and 95 deletions.
diff --git a/compiler/rustc_expand/src/lib.rs b/compiler/rustc_expand/src/lib.rs
@@ -2,7 +2,6 @@
 #![feature(associated_type_bounds)]
 #![feature(associated_type_defaults)]
 #![feature(crate_visibility_modifier)]
-#![feature(decl_macro)]
 #![feature(if_let_guard)]
 #![feature(let_chains)]
 #![feature(let_else)]

diff --git a/compiler/rustc_expand/src/mbe/macro_check.rs b/compiler/rustc_expand/src/mbe/macro_check.rs
@@ -110,7 +110,7 @@ use rustc_ast::token::{DelimToken, Token, TokenKind};
 use rustc_ast::{NodeId, DUMMY_NODE_ID};
 use rustc_data_structures::fx::FxHashMap;
 use rustc_errors::MultiSpan;
-use rustc_session::lint::builtin::META_VARIABLE_MISUSE;
+use rustc_session::lint::builtin::{META_VARIABLE_MISUSE, MISSING_FRAGMENT_SPECIFIER};
 use rustc_session::parse::ParseSess;
 use rustc_span::symbol::kw;
 use rustc_span::{symbol::MacroRulesNormalizedIdent, Span};
@@ -261,7 +261,18 @@ fn check_binders(
             }
         }
         // Similarly, this can only happen when checking a toplevel macro.
-        TokenTree::MetaVarDecl(span, name, _kind) => {
+        TokenTree::MetaVarDecl(span, name, kind) => {
+            if kind.is_none() && node_id != DUMMY_NODE_ID {
+                // FIXME: Report this as a hard error eventually and remove equivalent errors from
+                // `parse_tt_inner` and `nameize`. Until then the error may be reported twice, once
+                // as a hard error and then once as a buffered lint.
+                sess.buffer_lint(
+                    MISSING_FRAGMENT_SPECIFIER,
+                    span,
+                    node_id,
+                    &format!("missing fragment specifier"),
+                );
+            }
             if !macros.is_empty() {
                 sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in nested lhs");
             }

diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -411,7 +411,6 @@ impl TtParser {
     /// track of through the mps generated.
     fn parse_tt_inner(
         &mut self,
-        sess: &ParseSess,
         matcher: &[MatcherLoc],
         token: &Token,
     ) -> Option<NamedParseResult> {
@@ -519,11 +518,9 @@ impl TtParser {
                             self.bb_mps.push(mp);
                         }
                     } else {
+                        // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
                         // Both this check and the one in `nameize` are necessary, surprisingly.
-                        if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
-                            // E.g. `$e` instead of `$e:expr`.
-                            return Some(Error(span, "missing fragment specifier".to_string()));
-                        }
+                        return Some(Error(span, "missing fragment specifier".to_string()));
                     }
                 }
                 MatcherLoc::Eof => {
@@ -549,7 +546,7 @@ impl TtParser {
                     // Need to take ownership of the matches from within the `Lrc`.
                     Lrc::make_mut(&mut eof_mp.matches);
                     let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter();
-                    self.nameize(sess, matcher, matches)
+                    self.nameize(matcher, matches)
                 }
                 EofMatcherPositions::Multiple => {
                     Error(token.span, "ambiguity: multiple successful parses".to_string())
@@ -587,7 +584,7 @@ impl TtParser {
 
             // Process `cur_mps` until either we have finished the input or we need to get some
             // parsing from the black-box parser done.
-            if let Some(res) = self.parse_tt_inner(&parser.sess, matcher, &parser.token) {
+            if let Some(res) = self.parse_tt_inner(matcher, &parser.token) {
                 return res;
             }
 
@@ -694,7 +691,6 @@ impl TtParser {
 
     fn nameize<I: Iterator<Item = NamedMatch>>(
         &self,
-        sess: &ParseSess,
         matcher: &[MatcherLoc],
         mut res: I,
     ) -> NamedParseResult {
@@ -711,11 +707,9 @@ impl TtParser {
                         }
                     };
                 } else {
+                    // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
                     // Both this check and the one in `parse_tt_inner` are necessary, surprisingly.
-                    if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
-                        // E.g. `$e` instead of `$e:expr`.
-                        return Error(span, "missing fragment specifier".to_string());
-                    }
+                    return Error(span, "missing fragment specifier".to_string());
                 }
             }
         }

diff --git a/compiler/rustc_expand/src/mbe/quoted.rs b/compiler/rustc_expand/src/mbe/quoted.rs
@@ -2,8 +2,7 @@ use crate::mbe::macro_parser::count_metavar_decls;
 use crate::mbe::{Delimited, KleeneOp, KleeneToken, MetaVarExpr, SequenceRepetition, TokenTree};
 
 use rustc_ast::token::{self, Token};
-use rustc_ast::tokenstream;
-use rustc_ast::{NodeId, DUMMY_NODE_ID};
+use rustc_ast::{tokenstream, NodeId};
 use rustc_ast_pretty::pprust;
 use rustc_feature::Features;
 use rustc_session::parse::{feature_err, ParseSess};
@@ -104,10 +103,7 @@ pub(super) fn parse(
                     }
                     tree => tree.as_ref().map_or(start_sp, tokenstream::TokenTree::span),
                 };
-                if node_id != DUMMY_NODE_ID {
-                    // Macros loaded from other crates have dummy node ids.
-                    sess.missing_fragment_specifiers.borrow_mut().insert(span, node_id);
-                }
+
                 result.push(TokenTree::MetaVarDecl(span, ident, None));
             }
 

diff --git a/compiler/rustc_interface/src/passes.rs b/compiler/rustc_interface/src/passes.rs
@@ -30,7 +30,6 @@ use rustc_resolve::{Resolver, ResolverArenas};
 use rustc_serialize::json;
 use rustc_session::config::{CrateType, Input, OutputFilenames, OutputType};
 use rustc_session::cstore::{MetadataLoader, MetadataLoaderDyn};
-use rustc_session::lint;
 use rustc_session::output::{filename_for_input, filename_for_metadata};
 use rustc_session::search_paths::PathKind;
 use rustc_session::{Limit, Session};
@@ -349,23 +348,8 @@ pub fn configure_and_expand(
             ecx.check_unused_macros();
         });
 
-        let mut missing_fragment_specifiers: Vec<_> = ecx
-            .sess
-            .parse_sess
-            .missing_fragment_specifiers
-            .borrow()
-            .iter()
-            .map(|(span, node_id)| (*span, *node_id))
-            .collect();
-        missing_fragment_specifiers.sort_unstable_by_key(|(span, _)| *span);
-
         let recursion_limit_hit = ecx.reduced_recursion_limit.is_some();
 
-        for (span, node_id) in missing_fragment_specifiers {
-            let lint = lint::builtin::MISSING_FRAGMENT_SPECIFIER;
-            let msg = "missing fragment specifier";
-            resolver.lint_buffer().buffer_lint(lint, node_id, span, msg);
-        }
         if cfg!(windows) {
             env::set_var("PATH", &old_path);
         }

diff --git a/compiler/rustc_session/src/parse.rs b/compiler/rustc_session/src/parse.rs
@@ -140,7 +140,6 @@ pub struct ParseSess {
     pub config: CrateConfig,
     pub check_config: CrateCheckConfig,
     pub edition: Edition,
-    pub missing_fragment_specifiers: Lock<FxHashMap<Span, NodeId>>,
     /// Places where raw identifiers were used. This is used to avoid complaining about idents
     /// clashing with keywords in new editions.
     pub raw_identifier_spans: Lock<Vec<Span>>,
@@ -195,7 +194,6 @@ impl ParseSess {
             config: FxHashSet::default(),
             check_config: CrateCheckConfig::default(),
             edition: ExpnId::root().expn_data().edition,
-            missing_fragment_specifiers: Default::default(),
             raw_identifier_spans: Lock::new(Vec::new()),
             bad_unicode_identifiers: Lock::new(Default::default()),
             source_map,

diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs
@@ -393,7 +393,7 @@ impl<T> Rc<T> {
     /// # Examples
     ///
     /// ```
-    /// #![allow(dead_code)]
+    /// # #![allow(dead_code)]
     /// use std::rc::{Rc, Weak};
     ///
     /// struct Gadget {

diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
@@ -117,27 +117,99 @@ use crate::vec::Vec;
 ///
 /// # UTF-8
 ///
-/// `String`s are always valid UTF-8. This has a few implications, the first of
-/// which is that if you need a non-UTF-8 string, consider [`OsString`]. It is
-/// similar, but without the UTF-8 constraint. The second implication is that
-/// you cannot index into a `String`:
+/// `String`s are always valid UTF-8. If you need a non-UTF-8 string, consider
+/// [`OsString`]. It is similar, but without the UTF-8 constraint. Because UTF-8
+/// is a variable width encoding, `String`s are typically smaller than an array of
+/// the same `chars`:
+///
+/// ```
+/// use std::mem;
+///
+/// // `s` is ASCII which represents each `char` as one byte
+/// let s = "hello";
+/// assert_eq!(s.len(), 5);
+///
+/// // A `char` array with the same contents would be longer because
+/// // every `char` is four bytes
+/// let s = ['h', 'e', 'l', 'l', 'o'];
+/// let size: usize = s.into_iter().map(|c| mem::size_of_val(&c)).sum();
+/// assert_eq!(size, 20);
+///
+/// // However, for non-ASCII strings, the difference will be smaller
+/// // and sometimes they are the same
+/// let s = "💖💖💖💖💖";
+/// assert_eq!(s.len(), 20);
+///
+/// let s = ['💖', '💖', '💖', '💖', '💖'];
+/// let size: usize = s.into_iter().map(|c| mem::size_of_val(&c)).sum();
+/// assert_eq!(size, 20);
+/// ```
+///
+/// This raises interesting questions as to how `s[i]` should work.
+/// What should `i` be here? Several options include byte indices and
+/// `char` indices but, because of UTF-8 encoding, only byte indices
+/// would provide constant time indexing. Getting the `i`th `char`, for
+/// example, is available using [`chars`]:
+///
+/// ```
+/// let s = "hello";
+/// let third_character = s.chars().nth(2);
+/// assert_eq!(third_character, Some('l'));
+///
+/// let s = "💖💖💖💖💖";
+/// let third_character = s.chars().nth(2);
+/// assert_eq!(third_character, Some('💖'));
+/// ```
+///
+/// Next, what should `s[i]` return? Because indexing returns a reference
+/// to underlying data it could be `&u8`, `&[u8]`, or something else similar.
+/// Since we're only providing one index, `&u8` makes the most sense but that
+/// might not be what the user expects and can be explicitly achieved with
+/// [`as_bytes()`]:
+///
+/// ```
+/// // The first byte is 104 - the byte value of `'h'`
+/// let s = "hello";
+/// assert_eq!(s.as_bytes()[0], 104);
+/// // or
+/// assert_eq!(s.as_bytes()[0], b'h');
+///
+/// // The first byte is 240 which isn't obviously useful
+/// let s = "💖💖💖💖💖";
+/// assert_eq!(s.as_bytes()[0], 240);
+/// ```
+///
+/// Due to these ambiguities/restrictions, indexing with a `usize` is simply
+/// forbidden:
 ///
 /// ```compile_fail,E0277
 /// let s = "hello";
 ///
-/// println!("The first letter of s is {}", s[0]); // ERROR!!!
+/// // The following will not compile!
+/// println!("The first letter of s is {}", s[0]);
 /// ```
 ///
+/// It is more clear, however, how `&s[i..j]` should work (that is,
+/// indexing with a range). It should accept byte indices (to be constant-time)
+/// and return a `&str` which is UTF-8 encoded. This is also called "string slicing".
+/// Note this will panic if the byte indices provided are not character
+/// boundaries - see [`is_char_boundary`] for more details. See the implementations
+/// for [`SliceIndex<str>`] for more details on string slicing. For a non-panicking
+/// version of string slicing, see [`get`].
+///
 /// [`OsString`]: ../../std/ffi/struct.OsString.html "ffi::OsString"
+/// [`SliceIndex<str>`]: core::slice::SliceIndex
+/// [`as_bytes()`]: str::as_bytes
+/// [`get`]: str::get
+/// [`is_char_boundary`]: str::is_char_boundary
 ///
-/// Indexing is intended to be a constant-time operation, but UTF-8 encoding
-/// does not allow us to do this. Furthermore, it's not clear what sort of
-/// thing the index should return: a byte, a codepoint, or a grapheme cluster.
-/// The [`bytes`] and [`chars`] methods return iterators over the first
-/// two, respectively.
+/// The [`bytes`] and [`chars`] methods return iterators over the bytes and
+/// codepoints of the string, respectively. To iterate over codepoints along
+/// with byte indices, use [`char_indices`].
 ///
 /// [`bytes`]: str::bytes
 /// [`chars`]: str::chars
+/// [`char_indices`]: str::char_indices
 ///
 /// # Deref
 ///

diff --git a/library/core/src/alloc/layout.rs b/library/core/src/alloc/layout.rs
@@ -1,7 +1,6 @@
 use crate::cmp;
 use crate::fmt;
-use crate::mem;
-use crate::num::NonZeroUsize;
+use crate::mem::{self, ValidAlign};
 use crate::ptr::NonNull;
 
 // While this function is used in one place and its implementation
@@ -40,7 +39,7 @@ pub struct Layout {
     //
     // (However, we do not analogously require `align >= sizeof(void*)`,
     //  even though that is *also* a requirement of `posix_memalign`.)
-    align_: NonZeroUsize,
+    align_: ValidAlign,
 }
 
 impl Layout {
@@ -97,8 +96,8 @@ impl Layout {
     #[must_use]
     #[inline]
     pub const unsafe fn from_size_align_unchecked(size: usize, align: usize) -> Self {
-        // SAFETY: the caller must ensure that `align` is greater than zero.
-        Layout { size_: size, align_: unsafe { NonZeroUsize::new_unchecked(align) } }
+        // SAFETY: the caller must ensure that `align` is a power of two.
+        Layout { size_: size, align_: unsafe { ValidAlign::new_unchecked(align) } }
     }
 
     /// The minimum size in bytes for a memory block of this layout.
@@ -117,7 +116,7 @@ impl Layout {
                   without modifying the layout"]
     #[inline]
     pub const fn align(&self) -> usize {
-        self.align_.get()
+        self.align_.as_nonzero().get()
     }
 
     /// Constructs a `Layout` suitable for holding a value of type `T`.

diff --git a/library/core/src/mem/mod.rs b/library/core/src/mem/mod.rs
@@ -21,6 +21,12 @@ mod maybe_uninit;
 #[stable(feature = "maybe_uninit", since = "1.36.0")]
 pub use maybe_uninit::MaybeUninit;
 
+mod valid_align;
+// For now this type is left crate-local.  It could potentially make sense to expose
+// it publicly, as it would be a nice parameter type for methods which need to take
+// alignment as a parameter, such as `Layout::padding_needed_for`.
+pub(crate) use valid_align::ValidAlign;
+
 #[stable(feature = "rust1", since = "1.0.0")]
 #[doc(inline)]
 pub use crate::intrinsics::transmute;