From 76c71448961645b9708e4e4890422f5f483dfa3a Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Fri, 7 Feb 2020 14:02:24 -0500 Subject: [PATCH] Properly handle Spans that reference imported SourceFiles Previously, metadata encoding used DUMMY_SP to represent any spans that referenced an 'imported' SourceFile - e.g. a SourceFile from an upstream dependency. These leads to sub-optimal error messages in certain cases (see the included test). This PR changes how we encode and decode spans in crate metadata. We encode spans in one of two ways: * 'Local' spans, which reference non-imported SourceFiles, are encoded exactly as before. * 'Foreign' spans, which reference imported SourceFiles, are encoded with the CrateNum of their 'originating' crate. Additionally, their 'lo' and 'high' values are rebased on top of the 'originating' crate, which allows them to be used with the SourceMap data encoded for that crate. The `ExternalSource` enum is renamed to `ExternalSourceKind`. There is now a struct called `ExternalSource`, which holds an `ExternalSourceKind` along with the original line number information for the file. This is used during `Span` serialization to rebase spans onto their 'owning' crate. --- src/librustc/hir/map/collector.rs | 4 +- src/librustc/ich/impls_syntax.rs | 8 +- src/librustc_metadata/rmeta/decoder.rs | 91 +++++++++++++++++-- src/librustc_metadata/rmeta/encoder.rs | 58 +++++++++--- src/librustc_metadata/rmeta/mod.rs | 5 +- src/librustc_span/lib.rs | 61 ++++++++----- src/librustc_span/source_map.rs | 12 ++- .../ui/span/auxiliary/transitive_dep_three.rs | 9 ++ .../ui/span/auxiliary/transitive_dep_two.rs | 3 + src/test/ui/span/transitive-dep-span.rs | 13 +++ src/test/ui/span/transitive-dep-span.stderr | 19 ++++ 11 files changed, 233 insertions(+), 50 deletions(-) create mode 100644 src/test/ui/span/auxiliary/transitive_dep_three.rs create mode 100644 src/test/ui/span/auxiliary/transitive_dep_two.rs create mode 100644 src/test/ui/span/transitive-dep-span.rs create mode 100644 src/test/ui/span/transitive-dep-span.stderr diff --git a/src/librustc/hir/map/collector.rs b/src/librustc/hir/map/collector.rs index ebd335ab35504..b2045ddf45db5 100644 --- a/src/librustc/hir/map/collector.rs +++ b/src/librustc/hir/map/collector.rs @@ -10,7 +10,7 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::svh::Svh; use rustc_hir as hir; use rustc_hir::def_id::CRATE_DEF_INDEX; -use rustc_hir::def_id::{CrateNum, DefIndex, LOCAL_CRATE}; +use rustc_hir::def_id::{DefIndex, LOCAL_CRATE}; use rustc_hir::intravisit::{self, NestedVisitorMap, Visitor}; use rustc_hir::*; use rustc_index::vec::IndexVec; @@ -213,7 +213,7 @@ impl<'a, 'hir> NodeCollector<'a, 'hir> { .source_map .files() .iter() - .filter(|source_file| CrateNum::from_u32(source_file.crate_of_origin) == LOCAL_CRATE) + .filter(|source_file| source_file.cnum == LOCAL_CRATE) .map(|source_file| source_file.name_hash) .collect(); diff --git a/src/librustc/ich/impls_syntax.rs b/src/librustc/ich/impls_syntax.rs index daff8a0f1825e..c5a4b53b10df8 100644 --- a/src/librustc/ich/impls_syntax.rs +++ b/src/librustc/ich/impls_syntax.rs @@ -5,7 +5,6 @@ use crate::ich::StableHashingContext; use rustc_ast::ast; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; -use rustc_hir::def_id::{CrateNum, DefId, CRATE_DEF_INDEX}; use rustc_span::SourceFile; use smallvec::SmallVec; @@ -59,7 +58,7 @@ impl<'a> HashStable> for SourceFile { name_hash, name_was_remapped, unmapped_path: _, - crate_of_origin, + cnum, // Do not hash the source as it is not encoded src: _, src_hash, @@ -75,9 +74,6 @@ impl<'a> HashStable> for SourceFile { (name_hash as u64).hash_stable(hcx, hasher); name_was_remapped.hash_stable(hcx, hasher); - DefId { krate: CrateNum::from_u32(crate_of_origin), index: CRATE_DEF_INDEX } - .hash_stable(hcx, hasher); - src_hash.hash_stable(hcx, hasher); // We only hash the relative position within this source_file @@ -101,6 +97,8 @@ impl<'a> HashStable> for SourceFile { for &char_pos in normalized_pos.iter() { stable_normalized_pos(char_pos, start_pos).hash_stable(hcx, hasher); } + + cnum.hash_stable(hcx, hasher); } } diff --git a/src/librustc_metadata/rmeta/decoder.rs b/src/librustc_metadata/rmeta/decoder.rs index 1d8eb0cde468a..eb4753740f208 100644 --- a/src/librustc_metadata/rmeta/decoder.rs +++ b/src/librustc_metadata/rmeta/decoder.rs @@ -387,7 +387,7 @@ impl<'a, 'tcx> SpecializedDecoder for DecodeContext<'a, 'tcx> { return Ok(DUMMY_SP); } - debug_assert_eq!(tag, TAG_VALID_SPAN); + debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN); let lo = BytePos::decode(self)?; let len = BytePos::decode(self)?; @@ -399,7 +399,68 @@ impl<'a, 'tcx> SpecializedDecoder for DecodeContext<'a, 'tcx> { bug!("Cannot decode Span without Session.") }; - let imported_source_files = self.cdata().imported_source_files(&sess.source_map()); + // There are two possibilities here: + // 1. This is a 'local span', which is located inside a `SourceFile` + // that came from this crate. In this case, we use the source map data + // encoded in this crate. This branch should be taken nearly all of the time. + // 2. This is a 'foreign span', which is located inside a `SourceFile` + // that came from a *different* crate (some crate upstream of the one + // whose metadata we're looking at). For example, consider this dependency graph: + // + // A -> B -> C + // + // Suppose that we're currently compiling crate A, and start deserializing + // metadata from crate B. When we deserialize a Span from crate B's metadata, + // there are two posibilites: + // + // 1. The span references a file from crate B. This makes it a 'local' span, + // which means that we can use crate B's serialized source map information. + // 2. The span references a file from crate C. This makes it a 'foreign' span, + // which means we need to use Crate *C* (not crate B) to determine the source + // map information. We only record source map information for a file in the + // crate that 'owns' it, so deserializing a Span may require us to look at + // a transitive dependency. + // + // When we encode a foreign span, we adjust its 'lo' and 'high' values + // to be based on the *foreign* crate (e.g. crate C), not the crate + // we are writing metadata for (e.g. crate B). This allows us to + // treat the 'local' and 'foreign' cases almost identically during deserialization: + // we can call `imported_source_files` for the proper crate, and binary search + // through the returned slice using our span. + let imported_source_files = if tag == TAG_VALID_SPAN_LOCAL { + self.cdata().imported_source_files(sess.source_map()) + } else { + // FIXME: We don't decode dependencies of proc-macros. + // Remove this once #69976 is merged + if self.cdata().root.is_proc_macro_crate() { + debug!( + "SpecializedDecoder::specialized_decode: skipping span for proc-macro crate {:?}", + self.cdata().cnum + ); + // Decode `CrateNum` as u32 - using `CrateNum::decode` will ICE + // since we don't have `cnum_map` populated. + // This advances the decoder position so that we can continue + // to read metadata. + let _ = u32::decode(self)?; + return Ok(DUMMY_SP); + } + // tag is TAG_VALID_SPAN_FOREIGN, checked by `debug_assert` above + let cnum = CrateNum::decode(self)?; + debug!( + "SpecializedDecoder::specialized_decode: loading source files from cnum {:?}", + cnum + ); + + // Decoding 'foreign' spans should be rare enough that it's + // not worth it to maintain a per-CrateNum cache for `last_source_file_index`. + // We just set it to 0, to ensure that we don't try to access something out + // of bounds for our initial 'guess' + self.last_source_file_index = 0; + + let foreign_data = self.cdata().cstore.get_crate_data(cnum); + foreign_data.imported_source_files(sess.source_map()) + }; + let source_file = { // Optimize for the case that most spans within a translated item // originate from the same source_file. @@ -413,16 +474,32 @@ impl<'a, 'tcx> SpecializedDecoder for DecodeContext<'a, 'tcx> { .binary_search_by_key(&lo, |source_file| source_file.original_start_pos) .unwrap_or_else(|index| index - 1); - self.last_source_file_index = index; + // Don't try to cache the index for foreign spans, + // as this would require a map from CrateNums to indices + if tag == TAG_VALID_SPAN_LOCAL { + self.last_source_file_index = index; + } &imported_source_files[index] } }; // Make sure our binary search above is correct. - debug_assert!(lo >= source_file.original_start_pos && lo <= source_file.original_end_pos); + debug_assert!( + lo >= source_file.original_start_pos && lo <= source_file.original_end_pos, + "Bad binary search: lo={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}", + lo, + source_file.original_start_pos, + source_file.original_end_pos + ); // Make sure we correctly filtered out invalid spans during encoding - debug_assert!(hi >= source_file.original_start_pos && hi <= source_file.original_end_pos); + debug_assert!( + hi >= source_file.original_start_pos && hi <= source_file.original_end_pos, + "Bad binary search: hi={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}", + hi, + source_file.original_start_pos, + source_file.original_end_pos + ); let lo = (lo + source_file.translated_source_file.start_pos) - source_file.original_start_pos; @@ -1424,14 +1501,16 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { let local_version = local_source_map.new_imported_source_file( name, name_was_remapped, - self.cnum.as_u32(), src_hash, name_hash, source_length, + self.cnum, lines, multibyte_chars, non_narrow_chars, normalized_pos, + start_pos, + end_pos, ); debug!( "CrateMetaData::imported_source_files alloc \ diff --git a/src/librustc_metadata/rmeta/encoder.rs b/src/librustc_metadata/rmeta/encoder.rs index ce62f15f85d93..25c925f2e2a88 100644 --- a/src/librustc_metadata/rmeta/encoder.rs +++ b/src/librustc_metadata/rmeta/encoder.rs @@ -29,7 +29,7 @@ use rustc_ast::ast; use rustc_ast::attr; use rustc_span::source_map::Spanned; use rustc_span::symbol::{kw, sym, Ident, Symbol}; -use rustc_span::{self, FileName, SourceFile, Span}; +use rustc_span::{self, ExternalSource, FileName, SourceFile, Span}; use std::hash::Hash; use std::num::NonZeroUsize; use std::path::Path; @@ -167,20 +167,56 @@ impl<'tcx> SpecializedEncoder for EncodeContext<'tcx> { return TAG_INVALID_SPAN.encode(self); } - // HACK(eddyb) there's no way to indicate which crate a Span is coming - // from right now, so decoding would fail to find the SourceFile if - // it's not local to the crate the Span is found in. - if self.source_file_cache.is_imported() { - return TAG_INVALID_SPAN.encode(self); - } + // There are two possible cases here: + // 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the + // crate we are writing metadata for. When the metadata for *this* crate gets + // deserialized, the deserializer will need to know which crate it originally came + // from. We use `TAG_VALID_SPAN_FOREIGN` to indicate that a `CrateNum` should + // be deserialized after the rest of the span data, which tells the deserializer + // which crate contains the source map information. + // 2. This span comes from our own crate. No special hamdling is needed - we just + // write `TAG_VALID_SPAN_LOCAL` to let the deserializer know that it should use + // our own source map information. + let (tag, lo, hi) = if self.source_file_cache.is_imported() { + // To simplify deserialization, we 'rebase' this span onto the crate it originally came from + // (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values + // are relative to the source map information for the 'foreign' crate whose CrateNum + // we write into the metadata. This allows `imported_source_files` to binary + // search through the 'foreign' crate's source map information, using the + // deserialized 'lo' and 'hi' values directly. + // + // All of this logic ensures that the final result of deserialization is a 'normal' + // Span that can be used without any additional trouble. + let external_start_pos = { + // Introduce a new scope so that we drop the 'lock()' temporary + match &*self.source_file_cache.external_src.lock() { + ExternalSource::Foreign { original_start_pos, .. } => *original_start_pos, + src => panic!("Unexpected external source {:?}", src), + } + }; + let lo = (span.lo - self.source_file_cache.start_pos) + external_start_pos; + let hi = (span.hi - self.source_file_cache.start_pos) + external_start_pos; + + (TAG_VALID_SPAN_FOREIGN, lo, hi) + } else { + (TAG_VALID_SPAN_LOCAL, span.lo, span.hi) + }; - TAG_VALID_SPAN.encode(self)?; - span.lo.encode(self)?; + tag.encode(self)?; + lo.encode(self)?; // Encode length which is usually less than span.hi and profits more // from the variable-length integer encoding that we use. - let len = span.hi - span.lo; - len.encode(self) + let len = hi - lo; + len.encode(self)?; + + if tag == TAG_VALID_SPAN_FOREIGN { + // This needs to be two lines to avoid holding the `self.source_file_cache` + // while calling `cnum.encode(self)` + let cnum = self.source_file_cache.cnum; + cnum.encode(self)?; + } + Ok(()) // Don't encode the expansion context. } diff --git a/src/librustc_metadata/rmeta/mod.rs b/src/librustc_metadata/rmeta/mod.rs index 89e26b15d502b..379658b0e84e2 100644 --- a/src/librustc_metadata/rmeta/mod.rs +++ b/src/librustc_metadata/rmeta/mod.rs @@ -411,5 +411,6 @@ struct GeneratorData<'tcx> { } // Tags used for encoding Spans: -const TAG_VALID_SPAN: u8 = 0; -const TAG_INVALID_SPAN: u8 = 1; +const TAG_VALID_SPAN_LOCAL: u8 = 0; +const TAG_VALID_SPAN_FOREIGN: u8 = 1; +const TAG_INVALID_SPAN: u8 = 2; diff --git a/src/librustc_span/lib.rs b/src/librustc_span/lib.rs index 1d493da9e5b11..4d8311fc82036 100644 --- a/src/librustc_span/lib.rs +++ b/src/librustc_span/lib.rs @@ -24,7 +24,7 @@ pub mod hygiene; use hygiene::Transparency; pub use hygiene::{DesugaringKind, ExpnData, ExpnId, ExpnKind, MacroKind, SyntaxContext}; pub mod def_id; -use def_id::DefId; +use def_id::{CrateNum, DefId, LOCAL_CRATE}; mod span_encoding; pub use span_encoding::{Span, DUMMY_SP}; @@ -836,30 +836,42 @@ pub struct NormalizedPos { pub diff: u32, } -/// The state of the lazy external source loading mechanism of a `SourceFile`. -#[derive(PartialEq, Eq, Clone)] +#[derive(PartialEq, Eq, Clone, Debug)] pub enum ExternalSource { + /// No external source has to be loaded, since the `SourceFile` represents a local crate. + Unneeded, + Foreign { + kind: ExternalSourceKind, + /// This SourceFile's byte-offset within the source_map of its original crate + original_start_pos: BytePos, + /// The end of this SourceFile within the source_map of its original crate + original_end_pos: BytePos, + }, +} + +/// The state of the lazy external source loading mechanism of a `SourceFile`. +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum ExternalSourceKind { /// The external source has been loaded already. Present(String), /// No attempt has been made to load the external source. AbsentOk, /// A failed attempt has been made to load the external source. AbsentErr, - /// No external source has to be loaded, since the `SourceFile` represents a local crate. Unneeded, } impl ExternalSource { pub fn is_absent(&self) -> bool { - match *self { - ExternalSource::Present(_) => false, + match self { + ExternalSource::Foreign { kind: ExternalSourceKind::Present(_), .. } => false, _ => true, } } pub fn get_source(&self) -> Option<&str> { - match *self { - ExternalSource::Present(ref src) => Some(src), + match self { + ExternalSource::Foreign { kind: ExternalSourceKind::Present(ref src), .. } => Some(src), _ => None, } } @@ -880,8 +892,6 @@ pub struct SourceFile { /// The unmapped path of the file that the source came from. /// Set to `None` if the `SourceFile` was imported from an external crate. pub unmapped_path: Option, - /// Indicates which crate this `SourceFile` was imported from. - pub crate_of_origin: u32, /// The complete source code. pub src: Option>, /// The source code's hash. @@ -903,6 +913,8 @@ pub struct SourceFile { pub normalized_pos: Vec, /// A hash of the filename, used for speeding up hashing in incremental compilation. pub name_hash: u128, + /// Indicates which crate this `SourceFile` was imported from. + pub cnum: CrateNum, } impl Encodable for SourceFile { @@ -969,7 +981,8 @@ impl Encodable for SourceFile { s.emit_struct_field("multibyte_chars", 6, |s| self.multibyte_chars.encode(s))?; s.emit_struct_field("non_narrow_chars", 7, |s| self.non_narrow_chars.encode(s))?; s.emit_struct_field("name_hash", 8, |s| self.name_hash.encode(s))?; - s.emit_struct_field("normalized_pos", 9, |s| self.normalized_pos.encode(s)) + s.emit_struct_field("normalized_pos", 9, |s| self.normalized_pos.encode(s))?; + s.emit_struct_field("cnum", 10, |s| self.cnum.encode(s)) }) } } @@ -1019,24 +1032,24 @@ impl Decodable for SourceFile { let name_hash: u128 = d.read_struct_field("name_hash", 8, |d| Decodable::decode(d))?; let normalized_pos: Vec = d.read_struct_field("normalized_pos", 9, |d| Decodable::decode(d))?; + let cnum: CrateNum = d.read_struct_field("cnum", 10, |d| Decodable::decode(d))?; Ok(SourceFile { name, name_was_remapped, unmapped_path: None, - // `crate_of_origin` has to be set by the importer. - // This value matches up with `rustc_hir::def_id::INVALID_CRATE`. - // That constant is not available here, unfortunately. - crate_of_origin: std::u32::MAX - 1, start_pos, end_pos, src: None, src_hash, - external_src: Lock::new(ExternalSource::AbsentOk), + // Unused - the metadata decoder will construct + // a new SourceFile, filling in `external_src` properly + external_src: Lock::new(ExternalSource::Unneeded), lines, multibyte_chars, non_narrow_chars, normalized_pos, name_hash, + cnum, }) }) } @@ -1078,7 +1091,6 @@ impl SourceFile { name, name_was_remapped, unmapped_path: Some(unmapped_path), - crate_of_origin: 0, src: Some(Lrc::new(src)), src_hash, external_src: Lock::new(ExternalSource::Unneeded), @@ -1089,6 +1101,7 @@ impl SourceFile { non_narrow_chars, normalized_pos, name_hash, + cnum: LOCAL_CRATE, } } @@ -1106,21 +1119,27 @@ impl SourceFile { where F: FnOnce() -> Option, { - if *self.external_src.borrow() == ExternalSource::AbsentOk { + if matches!( + *self.external_src.borrow(), + ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, .. } + ) { let src = get_src(); let mut external_src = self.external_src.borrow_mut(); // Check that no-one else have provided the source while we were getting it - if *external_src == ExternalSource::AbsentOk { + if let ExternalSource::Foreign { + kind: src_kind @ ExternalSourceKind::AbsentOk, .. + } = &mut *external_src + { if let Some(src) = src { let mut hasher: StableHasher = StableHasher::new(); hasher.write(src.as_bytes()); if hasher.finish::() == self.src_hash { - *external_src = ExternalSource::Present(src); + *src_kind = ExternalSourceKind::Present(src); return true; } } else { - *external_src = ExternalSource::AbsentErr; + *src_kind = ExternalSourceKind::AbsentErr; } false diff --git a/src/librustc_span/source_map.rs b/src/librustc_span/source_map.rs index 353f7b3f52bc3..7dd9e2f6316b4 100644 --- a/src/librustc_span/source_map.rs +++ b/src/librustc_span/source_map.rs @@ -296,14 +296,16 @@ impl SourceMap { &self, filename: FileName, name_was_remapped: bool, - crate_of_origin: u32, src_hash: u128, name_hash: u128, source_len: usize, + cnum: CrateNum, mut file_local_lines: Vec, mut file_local_multibyte_chars: Vec, mut file_local_non_narrow_chars: Vec, mut file_local_normalized_pos: Vec, + original_start_pos: BytePos, + original_end_pos: BytePos, ) -> Lrc { let start_pos = self .allocate_address_space(source_len) @@ -332,10 +334,13 @@ impl SourceMap { name: filename, name_was_remapped, unmapped_path: None, - crate_of_origin, src: None, src_hash, - external_src: Lock::new(ExternalSource::AbsentOk), + external_src: Lock::new(ExternalSource::Foreign { + kind: ExternalSourceKind::AbsentOk, + original_start_pos, + original_end_pos, + }), start_pos, end_pos, lines: file_local_lines, @@ -343,6 +348,7 @@ impl SourceMap { non_narrow_chars: file_local_non_narrow_chars, normalized_pos: file_local_normalized_pos, name_hash, + cnum, }); let mut files = self.files.borrow_mut(); diff --git a/src/test/ui/span/auxiliary/transitive_dep_three.rs b/src/test/ui/span/auxiliary/transitive_dep_three.rs new file mode 100644 index 0000000000000..99b51625ac3ec --- /dev/null +++ b/src/test/ui/span/auxiliary/transitive_dep_three.rs @@ -0,0 +1,9 @@ +#[macro_export] +macro_rules! define_parse_error { + () => { + #[macro_export] + macro_rules! parse_error { + () => { parse error } + } + } +} diff --git a/src/test/ui/span/auxiliary/transitive_dep_two.rs b/src/test/ui/span/auxiliary/transitive_dep_two.rs new file mode 100644 index 0000000000000..5110c42765b6d --- /dev/null +++ b/src/test/ui/span/auxiliary/transitive_dep_two.rs @@ -0,0 +1,3 @@ +extern crate transitive_dep_three; + +transitive_dep_three::define_parse_error!(); diff --git a/src/test/ui/span/transitive-dep-span.rs b/src/test/ui/span/transitive-dep-span.rs new file mode 100644 index 0000000000000..b445d389c561a --- /dev/null +++ b/src/test/ui/span/transitive-dep-span.rs @@ -0,0 +1,13 @@ +// Tests that we properly serialize/deserialize spans from transitive dependencies +// (e.g. imported SourceFiles) +// +// The order of these next lines is important, since we need +// transitive_dep_two.rs to be able to reference transitive_dep_three.rs +// +// aux-build: transitive_dep_three.rs +// aux-build: transitive_dep_two.rs +// compile-flags: -Z macro-backtrace + +extern crate transitive_dep_two; + +transitive_dep_two::parse_error!(); //~ ERROR expected one of diff --git a/src/test/ui/span/transitive-dep-span.stderr b/src/test/ui/span/transitive-dep-span.stderr new file mode 100644 index 0000000000000..68d8911a4351c --- /dev/null +++ b/src/test/ui/span/transitive-dep-span.stderr @@ -0,0 +1,19 @@ +error: expected one of `!` or `::`, found `error` + --> $DIR/auxiliary/transitive_dep_three.rs:6:27 + | +LL | / macro_rules! parse_error { +LL | | () => { parse error } + | | ^^^^^ expected one of `!` or `::` +LL | | } + | |_________- in this expansion of `transitive_dep_two::parse_error!` + | + ::: $DIR/transitive-dep-span.rs:13:1 + | +LL | transitive_dep_two::parse_error!(); + | ----------------------------------- + | | + | in this macro invocation + | in this macro invocation + +error: aborting due to previous error +