Skip to content

Commit

Permalink
feat: Check and replace UTF-16 files
Browse files Browse the repository at this point in the history
We don't have good detection for non-UTF encodings and don't have
encoding support for UTF-32, so limiting it to just UTF-16.

Fixes #17
  • Loading branch information
Ed Page committed Nov 4, 2020
1 parent b8d35c3 commit 9d005be
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 12 deletions.
65 changes: 65 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/typos/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ unicode-segmentation = "1.6.0"
derive_more = "0.99.11"
derive_setters = "0.1"
content_inspector = "0.2.4"
encoding = "0.2"
38 changes: 26 additions & 12 deletions crates/typos/src/checks.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use bstr::ByteSlice;
use encoding::Encoding;

use crate::report;
use crate::tokens;
Expand Down Expand Up @@ -190,18 +191,31 @@ impl ParseWords {

let buffer = std::fs::read(path)
.map_err(|e| crate::ErrorKind::IoError.into_error().with_source(e))?;
if !explicit && !self.binary {
let content_type = content_inspector::inspect(&buffer);
// HACK: We only support UTF-8 at the moment
if content_type.is_binary()
|| (content_type != content_inspector::ContentType::UTF_8_BOM
&& content_type != content_inspector::ContentType::UTF_8)
{
let msg = report::BinaryFile { path };
reporter.report(msg.into());
return Ok(typos_found);
}
}
let content_type = content_inspector::inspect(&buffer);

let buffer = match content_type {
content_inspector::ContentType::BINARY |
// HACK: We don't support UTF-32 yet
content_inspector::ContentType::UTF_32LE |
content_inspector::ContentType::UTF_32BE
=> {
if !explicit && !self.binary {
let msg = report::BinaryFile { path };
reporter.report(msg.into());
return Ok(typos_found);
} else {
buffer
}
},
content_inspector::ContentType::UTF_8 |
content_inspector::ContentType::UTF_8_BOM
=> { buffer
},
content_inspector::ContentType::UTF_16LE |
content_inspector::ContentType::UTF_16BE => {
buffer
},
};

for line in buffer.lines() {
let msg = report::Parse {
Expand Down

0 comments on commit 9d005be

Please sign in to comment.