Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rustc_errors: use perfect hashing for character replacements #128463

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2712,6 +2712,7 @@ version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [
"phf_macros",
"phf_shared 0.11.2",
]

Expand Down Expand Up @@ -2745,6 +2746,19 @@ dependencies = [
"rand",
]

[[package]]
name = "phf_macros"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
dependencies = [
"phf_generator 0.11.2",
"phf_shared 0.11.2",
"proc-macro2",
"quote",
"syn 2.0.67",
]

[[package]]
name = "phf_shared"
version = "0.10.0"
Expand Down Expand Up @@ -3653,6 +3667,7 @@ version = "0.0.0"
dependencies = [
"annotate-snippets 0.10.2",
"derive_setters",
"phf",
"rustc_ast",
"rustc_ast_pretty",
"rustc_data_structures",
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_errors/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ edition = "2021"
# tidy-alphabetical-start
annotate-snippets = "0.10"
derive_setters = "0.1.6"
phf = { version = "0.11.2", features = ["macros"] }
rustc_ast = { path = "../rustc_ast" }
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
rustc_data_structures = { path = "../rustc_data_structures" }
Expand Down
102 changes: 49 additions & 53 deletions compiler/rustc_errors/src/emitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2563,66 +2563,62 @@ fn num_decimal_digits(num: usize) -> usize {
}

// We replace some characters so the CLI output is always consistent and underlines aligned.
// Keep the following list in sync with `rustc_span::char_width`.
// ATTENTION: keep lexicografically sorted so that the binary search will work
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
// tidy-alphabetical-start
const OUTPUT_REPLACEMENTS: phf::Map<char, &'static str> = phf::phf_map![
// In terminals without Unicode support the following will be garbled, but in *all* terminals
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode
// support" gate.
('\0', "␀"),
('\u{0001}', "␁"),
('\u{0002}', "␂"),
('\u{0003}', "␃"),
('\u{0004}', "␄"),
('\u{0005}', "␅"),
('\u{0006}', "␆"),
('\u{0007}', "␇"),
('\u{0008}', "␈"),
('\u{0009}', " "), // We do our own tab replacement
('\u{000b}', "␋"),
('\u{000c}', "␌"),
('\u{000d}', "␍"),
('\u{000e}', "␎"),
('\u{000f}', "␏"),
('\u{0010}', "␐"),
('\u{0011}', "␑"),
('\u{0012}', "␒"),
('\u{0013}', "␓"),
('\u{0014}', "␔"),
('\u{0015}', "␕"),
('\u{0016}', "␖"),
('\u{0017}', "␗"),
('\u{0018}', "␘"),
('\u{0019}', "␙"),
('\u{001a}', "␚"),
('\u{001b}', "␛"),
('\u{001c}', "␜"),
('\u{001d}', "␝"),
('\u{001e}', "␞"),
('\u{001f}', "␟"),
('\u{007f}', "␡"),
('\u{200d}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters.
('\u{202a}', "�"), // The following unicode text flow control characters are inconsistently
('\u{202b}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk
('\u{202c}', "�"), // not corresponding to the visible source code, so we replace them always.
('\u{202d}', "�"),
('\u{202e}', "�"),
('\u{2066}', "�"),
('\u{2067}', "�"),
('\u{2068}', "�"),
('\u{2069}', "�"),
// tidy-alphabetical-end
'\0' => "␀",
'\t' => " ", // We do our own tab replacement
'\r' => "␍",
'\u{0001}' => "␁",
'\u{0002}' => "␂",
'\u{0003}' => "␃",
'\u{0004}' => "␄",
'\u{0005}' => "␅",
'\u{0006}' => "␆",
'\u{0007}' => "␇",
'\u{0008}' => "␈",
'\u{000b}' => "␋",
'\u{000c}' => "␌",
'\u{000e}' => "␎",
'\u{000f}' => "␏",
'\u{0010}' => "␐",
'\u{0011}' => "␑",
'\u{0012}' => "␒",
'\u{0013}' => "␓",
'\u{0014}' => "␔",
'\u{0015}' => "␕",
'\u{0016}' => "␖",
'\u{0017}' => "␗",
'\u{0018}' => "␘",
'\u{0019}' => "␙",
'\u{001a}' => "␚",
'\u{001b}' => "␛",
'\u{001c}' => "␜",
'\u{001d}' => "␝",
'\u{001e}' => "␞",
'\u{001f}' => "␟",
'\u{007f}' => "␡",
'\u{200d}' => "", // Replace ZWJ for consistent terminal output of grapheme clusters.
'\u{202a}' => "�", // The following unicode text flow control characters are inconsistently
'\u{202b}' => "�", // supported across CLIs and can cause confusion due to the bytes on disk
'\u{202c}' => "�", // not corresponding to the visible source code, so we replace them always.
'\u{202d}' => "�",
'\u{202e}' => "�",
'\u{2066}' => "�",
'\u{2067}' => "�",
'\u{2068}' => "�",
'\u{2069}' => "�",
];

fn normalize_whitespace(s: &str) -> String {
// Scan the input string for a character in the ordered table above. If it's present, replace
// it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input
// char. At the end, allocate all chars into a string in one operation.
// Scan the input string for a character in the replacement table above.
// If it's present, replace it with its alternative string (it can be more than 1 char!).
// Otherwise, retain the input char.
s.chars().fold(String::with_capacity(s.len()), |mut s, c| {
match OUTPUT_REPLACEMENTS.binary_search_by_key(&c, |(k, _)| *k) {
Ok(i) => s.push_str(OUTPUT_REPLACEMENTS[i].1),
_ => s.push(c),
match OUTPUT_REPLACEMENTS.get(&c) {
Some(r) => s.push_str(r),
None => s.push(c),
}
s
})
Expand Down
5 changes: 5 additions & 0 deletions src/tools/tidy/src/deps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,10 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
"parking_lot_core",
"pathdiff",
"perf-event-open-sys",
"phf",
"phf_generator",
"phf_macros",
"phf_shared",
"pin-project-lite",
"polonius-engine",
"portable-atomic", // dependency for platforms doesn't support `AtomicU64` in std
Expand Down Expand Up @@ -386,6 +390,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
"sha2",
"sharded-slab",
"shlex",
"siphasher",
"smallvec",
"snap",
"stable_deref_trait",
Expand Down
Loading