Skip to content

Commit

Permalink
Auto merge of rust-lang#121150 - Swatinem:debug-ascii-str, r=joboet
Browse files Browse the repository at this point in the history
Add a fast-path to `Debug` ASCII `&str`

Instead of going through the `EscapeDebug` machinery, we can just skip over ASCII chars that don’t need any escaping.

---

This is an alternative / a companion to rust-lang#121138.

The other PR is adding the fast path deep within `EscapeDebug`, whereas this skips as early as possible.
  • Loading branch information
bors committed May 24, 2024
2 parents 99a4928 + 05754b8 commit a365890
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 20 deletions.
4 changes: 2 additions & 2 deletions core/benches/str/debug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ fn ascii_escapes(b: &mut Bencher) {
assert_fmt(
s,
r#""some\tmore\tascii\ttext\nthis time with some \"escapes\", also 64 byte""#,
21,
15,
);
b.iter(|| {
black_box(format!("{:?}", black_box(s)));
Expand Down Expand Up @@ -72,7 +72,7 @@ fn mostly_unicode(b: &mut Bencher) {
#[bench]
fn mixed(b: &mut Bencher) {
let s = "\"❤️\"\n\"hűha ez betű\"\n\"кириллических букв\".";
assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 36);
assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 21);
b.iter(|| {
black_box(format!("{:?}", black_box(s)));
});
Expand Down
59 changes: 41 additions & 18 deletions core/src/fmt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2402,23 +2402,47 @@ impl Display for bool {
impl Debug for str {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
f.write_char('"')?;
let mut from = 0;
for (i, c) in self.char_indices() {
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: false,
escape_double_quote: true,
});
// If char needs escaping, flush backlog so far and write, else skip
if esc.len() != 1 {
f.write_str(&self[from..i])?;
for c in esc {
f.write_char(c)?;

// substring we know is printable
let mut printable_range = 0..0;

fn needs_escape(b: u8) -> bool {
b > 0x7E || b < 0x20 || b == b'\\' || b == b'"'
}

// the loop here first skips over runs of printable ASCII as a fast path.
// other chars (unicode, or ASCII that needs escaping) are then handled per-`char`.
let mut rest = self;
while rest.len() > 0 {
let Some(non_printable_start) = rest.as_bytes().iter().position(|&b| needs_escape(b))
else {
printable_range.end += rest.len();
break;
};

printable_range.end += non_printable_start;
// SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary
rest = unsafe { rest.get_unchecked(non_printable_start..) };

let mut chars = rest.chars();
if let Some(c) = chars.next() {
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: false,
escape_double_quote: true,
});
if esc.len() != 1 {
f.write_str(&self[printable_range.clone()])?;
Display::fmt(&esc, f)?;
printable_range.start = printable_range.end + c.len_utf8();
}
from = i + c.len_utf8();
printable_range.end += c.len_utf8();
}
rest = chars.as_str();
}
f.write_str(&self[from..])?;

f.write_str(&self[printable_range])?;

f.write_char('"')
}
}
Expand All @@ -2434,13 +2458,12 @@ impl Display for str {
impl Debug for char {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
f.write_char('\'')?;
for c in self.escape_debug_ext(EscapeDebugExtArgs {
let esc = self.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: true,
escape_double_quote: false,
}) {
f.write_char(c)?
}
});
Display::fmt(&esc, f)?;
f.write_char('\'')
}
}
Expand Down

0 comments on commit a365890

Please sign in to comment.