Skip to content

Commit

Permalink
refactor: Switch out the UTF-16 encoding impl
Browse files Browse the repository at this point in the history
  • Loading branch information
epage authored and phip1611 committed Dec 5, 2023
1 parent be4c546 commit dffdc33
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 72 deletions.
65 changes: 5 additions & 60 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/typos-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ derive_more = "0.99.17"
derive_setters = "0.1"
itertools = "0.10"
serde_json = "1.0"
encoding = "0.2"
kstring = { version = "2.0.0", features = ["serde"] }
typed-arena = "2.0.2"
maplit = "1.0"
Expand All @@ -82,6 +81,7 @@ anstyle = "0.3.5"
anstream = "0.2.6"
serde_regex = "1.1.0"
regex = "1.7.3"
encoding_rs = "0.8.32"

[dev-dependencies]
assert_fs = "1.0"
Expand Down
37 changes: 26 additions & 11 deletions crates/typos-cli/src/file.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use bstr::ByteSlice;
use encoding::Encoding;
use std::io::Read;
use std::io::Write;

Expand Down Expand Up @@ -473,11 +472,23 @@ fn read_file(
(buffer, content_type)
},
content_inspector::ContentType::UTF_16LE => {
let buffer = report_result(encoding::all::UTF_16LE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?;
let mut decoded = String::new();
let (r, _) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
let decoded = match r {
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
_ => Err("invalid UTF-16LE encoding"),
};
let buffer = report_result(decoded, reporter)?;
(buffer.into_bytes(), content_type)
}
content_inspector::ContentType::UTF_16BE => {
let buffer = report_result(encoding::all::UTF_16BE.decode(&buffer, encoding::DecoderTrap::Strict), reporter)?;
let mut decoded = String::new();
let (r, _) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
let decoded = match r {
encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
_ => Err("invalid UTF-16BE encoding"),
};
let buffer = report_result(decoded, reporter)?;
(buffer.into_bytes(), content_type)
},
};
Expand Down Expand Up @@ -505,21 +516,25 @@ fn write_file(
// Error occurred, don't clear out the file
return Ok(());
}
report_result(
encoding::all::UTF_16LE.encode(&buffer, encoding::EncoderTrap::Strict),
reporter,
)?
let (encoded, _, replaced) = encoding_rs::UTF_16LE.encode(&buffer);
assert!(
!replaced,
"Coming from UTF-8, UTF-16LE shouldn't do replacements"
);
encoded.into_owned()
}
content_inspector::ContentType::UTF_16BE => {
let buffer = report_result(String::from_utf8(buffer), reporter)?;
if buffer.is_empty() {
// Error occurred, don't clear out the file
return Ok(());
}
report_result(
encoding::all::UTF_16BE.encode(&buffer, encoding::EncoderTrap::Strict),
reporter,
)?
let (encoded, _, replaced) = encoding_rs::UTF_16BE.encode(&buffer);
assert!(
!replaced,
"Coming from UTF-8, UTF-16BE shouldn't do replacements"
);
encoded.into_owned()
}
};

Expand Down

0 comments on commit dffdc33

Please sign in to comment.