Merge pull request #40 from epage/name

feat: Check file names
crate-ci · Jul 20, 2019 · 2c7dc55 · 2c7dc55
2 parents 807a4a8 + 95c0aea
commit 2c7dc55
Show file tree

Hide file tree

Showing 7 changed files with 239 additions and 54 deletions.
diff --git a/benches/file.rs b/benches/file.rs
@@ -18,6 +18,8 @@ fn process_empty(b: &mut test::Bencher) {
             sample_path.path(),
             &corrections,
             true,
+            true,
+            true,
             false,
             typos::report::print_silent,
         )
@@ -38,6 +40,8 @@ fn process_no_tokens(b: &mut test::Bencher) {
             sample_path.path(),
             &corrections,
             true,
+            true,
+            true,
             false,
             typos::report::print_silent,
         )
@@ -58,6 +62,8 @@ fn process_single_token(b: &mut test::Bencher) {
             sample_path.path(),
             &corrections,
             true,
+            true,
+            true,
             false,
             typos::report::print_silent,
         )
@@ -78,6 +84,8 @@ fn process_sherlock(b: &mut test::Bencher) {
             sample_path.path(),
             &corrections,
             true,
+            true,
+            true,
             false,
             typos::report::print_silent,
         )
@@ -98,6 +106,8 @@ fn process_code(b: &mut test::Bencher) {
             sample_path.path(),
             &corrections,
             true,
+            true,
+            true,
             false,
             typos::report::print_silent,
         )
@@ -118,6 +128,8 @@ fn process_corpus(b: &mut test::Bencher) {
             sample_path.path(),
             &corrections,
             true,
+            true,
+            true,
             false,
             typos::report::print_silent,
         )

diff --git a/benches/tokenize.rs b/benches/tokenize.rs
@@ -6,34 +6,34 @@ mod data;
 
 #[bench]
 fn symbol_parse_empty(b: &mut test::Bencher) {
-    b.iter(|| typos::tokens::Identifier::parse(data::EMPTY.as_bytes()).last());
+    b.iter(|| typos::tokens::Identifier::parse_bytes(data::EMPTY.as_bytes()).last());
 }
 
 #[bench]
 fn symbol_parse_no_tokens(b: &mut test::Bencher) {
-    b.iter(|| typos::tokens::Identifier::parse(data::NO_TOKENS.as_bytes()).last());
+    b.iter(|| typos::tokens::Identifier::parse_bytes(data::NO_TOKENS.as_bytes()).last());
 }
 
 #[bench]
 fn symbol_parse_single_token(b: &mut test::Bencher) {
     b.iter(|| {
-        typos::tokens::Identifier::parse(data::SINGLE_TOKEN.as_bytes()).last();
+        typos::tokens::Identifier::parse_bytes(data::SINGLE_TOKEN.as_bytes()).last();
     });
 }
 
 #[bench]
 fn symbol_parse_sherlock(b: &mut test::Bencher) {
-    b.iter(|| typos::tokens::Identifier::parse(data::SHERLOCK.as_bytes()).last());
+    b.iter(|| typos::tokens::Identifier::parse_bytes(data::SHERLOCK.as_bytes()).last());
 }
 
 #[bench]
 fn symbol_parse_code(b: &mut test::Bencher) {
-    b.iter(|| typos::tokens::Identifier::parse(data::CODE.as_bytes()).last());
+    b.iter(|| typos::tokens::Identifier::parse_bytes(data::CODE.as_bytes()).last());
 }
 
 #[bench]
 fn symbol_parse_corpus(b: &mut test::Bencher) {
-    b.iter(|| typos::tokens::Identifier::parse(data::CORPUS.as_bytes()).last());
+    b.iter(|| typos::tokens::Identifier::parse_bytes(data::CORPUS.as_bytes()).last());
 }
 
 #[bench]

diff --git a/docs/about.md b/docs/about.md
@@ -46,7 +46,7 @@ Whitelist: A confidence rating is given for how close a word is to one in the wh
 | Whole-project  | Yes                   | Yes                             | Yes                             | Yes         | No          |
 | Ignores hidden | Yes                   | Yes                             | ?                               | Yes         | No          |
 | Respect gitignore | Yes                | Yes                             | ?                               | No          | No          |
-| Checks filenames | No ([#24][def-24])  | No                              | ?                               | Yes         | No          |
+| Checks filenames | Yes                 | No                              | ?                               | Yes         | No          |
 | API            | Rust / [JSON Lines]   | Rust                            | ?                               | Python      | None        |
 | License        | MIT or Apache         | AGPL                            | MIT                             | GPLv2       | GPLv2       |
 
@@ -59,5 +59,4 @@ Whitelist: A confidence rating is given for how close a word is to one in the wh
 [def-14]: https://github.com/epage/typos/issues/14
 [def-17]: https://github.com/epage/typos/issues/17
 [def-18]: https://github.com/epage/typos/issues/18
-[def-24]: https://github.com/epage/typos/issues/24
 [def-3]: https://github.com/epage/typos/issues/3
diff --git a/src/lib.rs b/src/lib.rs
@@ -17,48 +17,87 @@ use bstr::ByteSlice;
 pub fn process_file(
     path: &std::path::Path,
     dictionary: &Dictionary,
+    check_filenames: bool,
+    check_files: bool,
     ignore_hex: bool,
     binary: bool,
     report: report::Report,
 ) -> Result<(), failure::Error> {
-    let mut buffer = Vec::new();
-    File::open(path)?.read_to_end(&mut buffer)?;
-    if !binary && buffer.find_byte(b'\0').is_some() {
-        return Ok(());
+    if check_filenames {
+        for part in path.components().filter_map(|c| c.as_os_str().to_str()) {
+            for ident in tokens::Identifier::parse(part) {
+                if !ignore_hex && is_hex(ident.token()) {
+                    continue;
+                }
+                if let Some(correction) = dictionary.correct_ident(ident) {
+                    let msg = report::FilenameCorrection {
+                        path,
+                        typo: ident.token(),
+                        correction,
+                        non_exhaustive: (),
+                    };
+                    report(msg.into());
+                }
+                for word in ident.split() {
+                    if let Some(correction) = dictionary.correct_word(word) {
+                        let msg = report::FilenameCorrection {
+                            path,
+                            typo: word.token(),
+                            correction,
+                            non_exhaustive: (),
+                        };
+                        report(msg.into());
+                    }
+                }
+            }
+        }
     }
 
-    for (line_idx, line) in buffer.lines().enumerate() {
-        let line_num = line_idx + 1;
-        for ident in tokens::Identifier::parse(line) {
-            if !ignore_hex && is_hex(ident.token()) {
-                continue;
-            }
-            if let Some(correction) = dictionary.correct_ident(ident) {
-                let col_num = ident.offset();
-                let msg = report::Message {
-                    path,
-                    line,
-                    line_num,
-                    col_num,
-                    typo: ident.token(),
-                    correction,
-                    non_exhaustive: (),
-                };
-                report(msg);
-            }
-            for word in ident.split() {
-                if let Some(correction) = dictionary.correct_word(word) {
-                    let col_num = word.offset();
-                    let msg = report::Message {
+    if check_files {
+        let mut buffer = Vec::new();
+        File::open(path)?.read_to_end(&mut buffer)?;
+        if !binary && buffer.find_byte(b'\0').is_some() {
+            let msg = report::BinaryFile {
+                path,
+                non_exhaustive: (),
+            };
+            report(msg.into());
+            return Ok(());
+        }
+
+        for (line_idx, line) in buffer.lines().enumerate() {
+            let line_num = line_idx + 1;
+            for ident in tokens::Identifier::parse_bytes(line) {
+                if !ignore_hex && is_hex(ident.token()) {
+                    continue;
+                }
+                if let Some(correction) = dictionary.correct_ident(ident) {
+                    let col_num = ident.offset();
+                    let msg = report::Correction {
                         path,
                         line,
                         line_num,
                         col_num,
-                        typo: word.token(),
+                        typo: ident.token(),
                         correction,
                         non_exhaustive: (),
                     };
-                    report(msg);
+                    report(msg.into());
+                }
+                for word in ident.split() {
+                    if let Some(correction) = dictionary.correct_word(word) {
+                        let col_num = word.offset();
+                        let msg = report::Correction {
+                            path,
+                            line,
+                            line_num,
+                            col_num,
+                            typo: word.token(),
+                            correction,
+                            non_exhaustive: (),
+                        };
+                        report(msg.into());
+                    }
                 }
             }
         }

diff --git a/src/main.rs b/src/main.rs
@@ -38,6 +38,26 @@ struct Options {
     /// Paths to check
     path: Vec<std::path::PathBuf>,
 
+    #[structopt(long, raw(overrides_with = r#""check-filenames""#))]
+    /// Skip verifying spelling in file names.
+    no_check_filenames: bool,
+    #[structopt(
+        long,
+        raw(overrides_with = r#""no-check-filenames""#),
+        raw(hidden = "true")
+    )]
+    check_filenames: bool,
+
+    #[structopt(long, raw(overrides_with = r#""check-files""#))]
+    /// Skip verifying spelling in filess.
+    no_check_files: bool,
+    #[structopt(
+        long,
+        raw(overrides_with = r#""no-check-files""#),
+        raw(hidden = "true")
+    )]
+    check_files: bool,
+
     #[structopt(long, raw(overrides_with = r#""hex""#))]
     /// Don't try to detect that an identifier looks like hex
     no_hex: bool,
@@ -115,6 +135,24 @@ impl Options {
         self
     }
 
+    pub fn check_files(&self) -> Option<bool> {
+        match (self.check_files, self.no_check_files) {
+            (true, false) => Some(true),
+            (false, true) => Some(false),
+            (false, false) => None,
+            (_, _) => unreachable!("StructOpt should make this impossible"),
+        }
+    }
+
+    pub fn check_filenames(&self) -> Option<bool> {
+        match (self.check_filenames, self.no_check_filenames) {
+            (true, false) => Some(true),
+            (false, true) => Some(false),
+            (false, false) => None,
+            (_, _) => unreachable!("StructOpt should make this impossible"),
+        }
+    }
+
     pub fn ignore_hex(&self) -> Option<bool> {
         match (self.no_hex, self.hex) {
             (true, false) => Some(false),
@@ -197,6 +235,8 @@ fn run() -> Result<(), failure::Error> {
     let options = Options::from_args().infer();
 
     let dictionary = typos::Dictionary::new();
+    let check_filenames = options.check_filenames().unwrap_or(true);
+    let check_files = options.check_files().unwrap_or(true);
     let ignore_hex = options.ignore_hex().unwrap_or(true);
     let binary = options.binary().unwrap_or(false);
 
@@ -222,6 +262,8 @@ fn run() -> Result<(), failure::Error> {
             typos::process_file(
                 entry.path(),
                 &dictionary,
+                check_filenames,
+                check_files,
                 ignore_hex,
                 binary,
                 options.format.report(),