Skip to content

Commit

Permalink
feat(parser): Ignore hex literals
Browse files Browse the repository at this point in the history
Trying to avoid accidentally correcting something that looks like a word
inside a hex number, like `0xBEAF`.

Fixes #19
  • Loading branch information
epage committed Jul 14, 2019
1 parent 389d5bd commit 006204e
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 3 deletions.
6 changes: 6 additions & 0 deletions benches/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ fn process_empty(b: &mut test::Bencher) {
typos::process_file(
sample_path.path(),
&corrections,
true,
typos::report::print_silent,
)
});
Expand All @@ -35,6 +36,7 @@ fn process_no_tokens(b: &mut test::Bencher) {
typos::process_file(
sample_path.path(),
&corrections,
true,
typos::report::print_silent,
)
});
Expand All @@ -53,6 +55,7 @@ fn process_single_token(b: &mut test::Bencher) {
typos::process_file(
sample_path.path(),
&corrections,
true,
typos::report::print_silent,
)
});
Expand All @@ -71,6 +74,7 @@ fn process_sherlock(b: &mut test::Bencher) {
typos::process_file(
sample_path.path(),
&corrections,
true,
typos::report::print_silent,
)
});
Expand All @@ -89,6 +93,7 @@ fn process_code(b: &mut test::Bencher) {
typos::process_file(
sample_path.path(),
&corrections,
true,
typos::report::print_silent,
)
});
Expand All @@ -107,6 +112,7 @@ fn process_corpus(b: &mut test::Bencher) {
typos::process_file(
sample_path.path(),
&corrections,
true,
typos::report::print_silent,
)
});
Expand Down
3 changes: 1 addition & 2 deletions docs/about.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Whitelist: A confidence rating is given for how close a word is to one in the wh
| Per-Lang Dict | No ([#14][def-14]) | No | ? | No | Yes |
| CamelCase | Yes | No | ? | No | Yes |
| snake_case | Yes | No | ? | No | Yes |
| Ignore Hex | No ([#19][def-19]) | No | ? | No | Yes |
| Ignore Hex | Yes | No | ? | No | Yes |
| C-Escapes | No ([#20][def-3]) | No | ? | No | Yes |
| Encodings | UTF-8 ([#17][def-17]) | UTF-8 | ? | Auto | Auto |
| Whole-project | Yes | Yes | Yes | Yes | No |
Expand All @@ -59,6 +59,5 @@ Whitelist: A confidence rating is given for how close a word is to one in the wh
[def-14]: https://github.com/epage/typos/issues/14
[def-17]: https://github.com/epage/typos/issues/17
[def-18]: https://github.com/epage/typos/issues/18
[def-19]: https://github.com/epage/typos/issues/19
[def-24]: https://github.com/epage/typos/issues/24
[def-3]: https://github.com/epage/typos/issues/3
12 changes: 12 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@ use std::io::Read;
pub fn process_file(
path: &std::path::Path,
dictionary: &Dictionary,
ignore_hex: bool,
report: report::Report,
) -> Result<(), failure::Error> {
let mut buffer = Vec::new();
File::open(path)?.read_to_end(&mut buffer)?;
for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
let line_num = line_idx + 1;
for ident in tokens::Identifier::parse(line) {
if !ignore_hex && is_hex(ident.token()) {
continue;
}
if let Some(correction) = dictionary.correct_ident(ident) {
let col_num = ident.offset();
let msg = report::Message {
Expand Down Expand Up @@ -55,3 +59,11 @@ pub fn process_file(

Ok(())
}

fn is_hex(ident: &str) -> bool {
lazy_static::lazy_static! {
// `_`: number literal separator in Rust and other languages
static ref HEX: regex::Regex = regex::Regex::new(r#"^0[xX][0-9a-fA-F_]+$"#).unwrap();
}
HEX.is_match(ident)
}
23 changes: 22 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ struct Options {
/// Paths to check
path: Vec<std::path::PathBuf>,

#[structopt(long, raw(overrides_with = r#""hex""#))]
/// Don't try to detect that an identifier looks like hex
no_hex: bool,
#[structopt(long, raw(overrides_with = r#""no-hex""#), raw(hidden = "true"))]
hex: bool,

#[structopt(
long = "format",
raw(possible_values = "&Format::variants()", case_insensitive = "true"),
Expand Down Expand Up @@ -103,6 +109,15 @@ impl Options {
self
}

pub fn ignore_hex(&self) -> Option<bool> {
match (self.no_hex, self.hex) {
(true, false) => Some(false),
(false, true) => Some(true),
(false, false) => None,
(_, _) => unreachable!("StructOpt should make this impossible"),
}
}

pub fn ignore_hidden(&self) -> Option<bool> {
match (self.hidden, self.no_hidden) {
(true, false) => Some(false),
Expand Down Expand Up @@ -167,6 +182,7 @@ fn run() -> Result<(), failure::Error> {
let options = Options::from_args().infer();

let dictionary = typos::Dictionary::new();
let ignore_hex = options.ignore_hex().unwrap_or(true);

let first_path = &options
.path
Expand All @@ -187,7 +203,12 @@ fn run() -> Result<(), failure::Error> {
for entry in walk.build() {
let entry = entry?;
if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
typos::process_file(entry.path(), &dictionary, options.format.report())?;
typos::process_file(
entry.path(),
&dictionary,
ignore_hex,
options.format.report(),
)?;
}
}

Expand Down

0 comments on commit 006204e

Please sign in to comment.