Skip to content

Commit

Permalink
Merge pull request #40 from epage/name
Browse files Browse the repository at this point in the history
 feat: Check file names
  • Loading branch information
epage authored Jul 20, 2019
2 parents 807a4a8 + 95c0aea commit 2c7dc55
Show file tree
Hide file tree
Showing 7 changed files with 239 additions and 54 deletions.
12 changes: 12 additions & 0 deletions benches/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ fn process_empty(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
true,
true,
false,
typos::report::print_silent,
)
Expand All @@ -38,6 +40,8 @@ fn process_no_tokens(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
true,
true,
false,
typos::report::print_silent,
)
Expand All @@ -58,6 +62,8 @@ fn process_single_token(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
true,
true,
false,
typos::report::print_silent,
)
Expand All @@ -78,6 +84,8 @@ fn process_sherlock(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
true,
true,
false,
typos::report::print_silent,
)
Expand All @@ -98,6 +106,8 @@ fn process_code(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
true,
true,
false,
typos::report::print_silent,
)
Expand All @@ -118,6 +128,8 @@ fn process_corpus(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
true,
true,
false,
typos::report::print_silent,
)
Expand Down
12 changes: 6 additions & 6 deletions benches/tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,34 +6,34 @@ mod data;

#[bench]
fn symbol_parse_empty(b: &mut test::Bencher) {
b.iter(|| typos::tokens::Identifier::parse(data::EMPTY.as_bytes()).last());
b.iter(|| typos::tokens::Identifier::parse_bytes(data::EMPTY.as_bytes()).last());
}

#[bench]
fn symbol_parse_no_tokens(b: &mut test::Bencher) {
b.iter(|| typos::tokens::Identifier::parse(data::NO_TOKENS.as_bytes()).last());
b.iter(|| typos::tokens::Identifier::parse_bytes(data::NO_TOKENS.as_bytes()).last());
}

#[bench]
fn symbol_parse_single_token(b: &mut test::Bencher) {
b.iter(|| {
typos::tokens::Identifier::parse(data::SINGLE_TOKEN.as_bytes()).last();
typos::tokens::Identifier::parse_bytes(data::SINGLE_TOKEN.as_bytes()).last();
});
}

#[bench]
fn symbol_parse_sherlock(b: &mut test::Bencher) {
b.iter(|| typos::tokens::Identifier::parse(data::SHERLOCK.as_bytes()).last());
b.iter(|| typos::tokens::Identifier::parse_bytes(data::SHERLOCK.as_bytes()).last());
}

#[bench]
fn symbol_parse_code(b: &mut test::Bencher) {
b.iter(|| typos::tokens::Identifier::parse(data::CODE.as_bytes()).last());
b.iter(|| typos::tokens::Identifier::parse_bytes(data::CODE.as_bytes()).last());
}

#[bench]
fn symbol_parse_corpus(b: &mut test::Bencher) {
b.iter(|| typos::tokens::Identifier::parse(data::CORPUS.as_bytes()).last());
b.iter(|| typos::tokens::Identifier::parse_bytes(data::CORPUS.as_bytes()).last());
}

#[bench]
Expand Down
3 changes: 1 addition & 2 deletions docs/about.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Whitelist: A confidence rating is given for how close a word is to one in the wh
| Whole-project | Yes | Yes | Yes | Yes | No |
| Ignores hidden | Yes | Yes | ? | Yes | No |
| Respect gitignore | Yes | Yes | ? | No | No |
| Checks filenames | No ([#24][def-24]) | No | ? | Yes | No |
| Checks filenames | Yes | No | ? | Yes | No |
| API | Rust / [JSON Lines] | Rust | ? | Python | None |
| License | MIT or Apache | AGPL | MIT | GPLv2 | GPLv2 |

Expand All @@ -59,5 +59,4 @@ Whitelist: A confidence rating is given for how close a word is to one in the wh
[def-14]: https://github.com/epage/typos/issues/14
[def-17]: https://github.com/epage/typos/issues/17
[def-18]: https://github.com/epage/typos/issues/18
[def-24]: https://github.com/epage/typos/issues/24
[def-3]: https://github.com/epage/typos/issues/3
97 changes: 68 additions & 29 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,48 +17,87 @@ use bstr::ByteSlice;
pub fn process_file(
path: &std::path::Path,
dictionary: &Dictionary,
check_filenames: bool,
check_files: bool,
ignore_hex: bool,
binary: bool,
report: report::Report,
) -> Result<(), failure::Error> {
let mut buffer = Vec::new();
File::open(path)?.read_to_end(&mut buffer)?;
if !binary && buffer.find_byte(b'\0').is_some() {
return Ok(());
if check_filenames {
for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
for ident in tokens::Identifier::parse(part) {
if !ignore_hex && is_hex(ident.token()) {
continue;
}
if let Some(correction) = dictionary.correct_ident(ident) {
let msg = report::FilenameCorrection {
path,
typo: ident.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
}
for word in ident.split() {
if let Some(correction) = dictionary.correct_word(word) {
let msg = report::FilenameCorrection {
path,
typo: word.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
}
}
}
}
}

for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
for ident in tokens::Identifier::parse(line) {
if !ignore_hex && is_hex(ident.token()) {
continue;
}
if let Some(correction) = dictionary.correct_ident(ident) {
let col_num = ident.offset();
let msg = report::Message {
path,
line,
line_num,
col_num,
typo: ident.token(),
correction,
non_exhaustive: (),
};
report(msg);
}
for word in ident.split() {
if let Some(correction) = dictionary.correct_word(word) {
let col_num = word.offset();
let msg = report::Message {
if check_files {
let mut buffer = Vec::new();
File::open(path)?.read_to_end(&mut buffer)?;
if !binary && buffer.find_byte(b'\0').is_some() {
let msg = report::BinaryFile {
path,
non_exhaustive: (),
};
report(msg.into());
return Ok(());
}

for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
for ident in tokens::Identifier::parse_bytes(line) {
if !ignore_hex && is_hex(ident.token()) {
continue;
}
if let Some(correction) = dictionary.correct_ident(ident) {
let col_num = ident.offset();
let msg = report::Correction {
path,
line,
line_num,
col_num,
typo: word.token(),
typo: ident.token(),
correction,
non_exhaustive: (),
};
report(msg);
report(msg.into());
}
for word in ident.split() {
if let Some(correction) = dictionary.correct_word(word) {
let col_num = word.offset();
let msg = report::Correction {
path,
line,
line_num,
col_num,
typo: word.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
}
}
}
}
Expand Down
42 changes: 42 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,26 @@ struct Options {
/// Paths to check
path: Vec<std::path::PathBuf>,

#[structopt(long, raw(overrides_with = r#""check-filenames""#))]
/// Skip verifying spelling in file names.
no_check_filenames: bool,
#[structopt(
long,
raw(overrides_with = r#""no-check-filenames""#),
raw(hidden = "true")
)]
check_filenames: bool,

#[structopt(long, raw(overrides_with = r#""check-files""#))]
/// Skip verifying spelling in filess.
no_check_files: bool,
#[structopt(
long,
raw(overrides_with = r#""no-check-files""#),
raw(hidden = "true")
)]
check_files: bool,

#[structopt(long, raw(overrides_with = r#""hex""#))]
/// Don't try to detect that an identifier looks like hex
no_hex: bool,
Expand Down Expand Up @@ -115,6 +135,24 @@ impl Options {
self
}

pub fn check_files(&self) -> Option<bool> {
match (self.check_files, self.no_check_files) {
(true, false) => Some(true),
(false, true) => Some(false),
(false, false) => None,
(_, _) => unreachable!("StructOpt should make this impossible"),
}
}

pub fn check_filenames(&self) -> Option<bool> {
match (self.check_filenames, self.no_check_filenames) {
(true, false) => Some(true),
(false, true) => Some(false),
(false, false) => None,
(_, _) => unreachable!("StructOpt should make this impossible"),
}
}

pub fn ignore_hex(&self) -> Option<bool> {
match (self.no_hex, self.hex) {
(true, false) => Some(false),
Expand Down Expand Up @@ -197,6 +235,8 @@ fn run() -> Result<(), failure::Error> {
let options = Options::from_args().infer();

let dictionary = typos::Dictionary::new();
let check_filenames = options.check_filenames().unwrap_or(true);
let check_files = options.check_files().unwrap_or(true);
let ignore_hex = options.ignore_hex().unwrap_or(true);
let binary = options.binary().unwrap_or(false);

Expand All @@ -222,6 +262,8 @@ fn run() -> Result<(), failure::Error> {
typos::process_file(
entry.path(),
&dictionary,
check_filenames,
check_files,
ignore_hex,
binary,
options.format.report(),
Expand Down
Loading

0 comments on commit 2c7dc55

Please sign in to comment.