diff --git a/crates/typos/src/check.rs b/crates/typos/src/check.rs new file mode 100644 index 000000000..00d966bff --- /dev/null +++ b/crates/typos/src/check.rs @@ -0,0 +1,100 @@ +use crate::tokens; +use crate::Dictionary; +use std::borrow::Cow; + +pub fn check_str<'b, 's: 'b>( + buffer: &'b str, + tokenizer: &'s tokens::Tokenizer, + dictionary: &'s dyn Dictionary, +) -> impl Iterator> { + tokenizer + .parse_str(buffer) + .flat_map(move |ident| process_ident(ident, dictionary)) +} + +pub fn check_bytes<'b, 's: 'b>( + buffer: &'b [u8], + tokenizer: &'s tokens::Tokenizer, + dictionary: &'s dyn Dictionary, +) -> impl Iterator> { + tokenizer + .parse_bytes(buffer) + .flat_map(move |ident| process_ident(ident, dictionary)) +} + +fn process_ident<'i, 's: 'i>( + ident: tokens::Identifier<'i>, + dictionary: &'s dyn Dictionary, +) -> impl Iterator> { + match dictionary.correct_ident(ident) { + Some(crate::Status::Valid) => itertools::Either::Left(None.into_iter()), + Some(corrections) => { + let typo = Typo { + byte_offset: ident.offset(), + typo: ident.token().into(), + corrections, + }; + itertools::Either::Left(Some(typo).into_iter()) + } + None => itertools::Either::Right( + ident + .split() + .filter_map(move |word| process_word(word, dictionary)), + ), + } +} + +fn process_word<'w, 's: 'w>( + word: tokens::Word<'w>, + dictionary: &'s dyn Dictionary, +) -> Option> { + match dictionary.correct_word(word) { + Some(crate::Status::Valid) => None, + Some(corrections) => { + let typo = Typo { + byte_offset: word.offset(), + typo: word.token().into(), + corrections, + }; + Some(typo) + } + None => None, + } +} + +/// An invalid term found in the buffer. +#[derive(Clone, Debug)] +#[non_exhaustive] +pub struct Typo<'m> { + pub byte_offset: usize, + pub typo: Cow<'m, str>, + pub corrections: crate::Status<'m>, +} + +impl<'m> Typo<'m> { + pub fn into_owned(self) -> Typo<'static> { + Typo { + byte_offset: self.byte_offset, + typo: Cow::Owned(self.typo.into_owned()), + corrections: self.corrections.into_owned(), + } + } + + pub fn borrow(&self) -> Typo<'_> { + Typo { + byte_offset: self.byte_offset, + typo: Cow::Borrowed(self.typo.as_ref()), + corrections: self.corrections.borrow(), + } + } +} + +impl<'m> Default for Typo<'m> { + fn default() -> Self { + Self { + byte_offset: 0, + typo: "".into(), + corrections: crate::Status::Invalid, + } + } +} diff --git a/crates/typos/src/dict.rs b/crates/typos/src/dict.rs index 971ca86d4..a5bc5e69c 100644 --- a/crates/typos/src/dict.rs +++ b/crates/typos/src/dict.rs @@ -13,21 +13,6 @@ pub trait Dictionary: Send + Sync { fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option>; } -pub(crate) struct NullDictionary; - -impl Dictionary for NullDictionary { - fn correct_ident<'s, 'w>( - &'s self, - _ident: crate::tokens::Identifier<'w>, - ) -> Option> { - None - } - - fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option> { - None - } -} - /// Validity of a term in a Dictionary. #[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)] #[serde(rename_all = "snake_case")] diff --git a/crates/typos/src/lib.rs b/crates/typos/src/lib.rs index 93ba77d62..5b2a3d525 100644 --- a/crates/typos/src/lib.rs +++ b/crates/typos/src/lib.rs @@ -1,7 +1,7 @@ +mod check; mod dict; -mod parser; pub mod tokens; +pub use check::*; pub use dict::*; -pub use parser::*; diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs deleted file mode 100644 index 883a730ba..000000000 --- a/crates/typos/src/parser.rs +++ /dev/null @@ -1,147 +0,0 @@ -use crate::tokens; -use crate::Dictionary; -use std::borrow::Cow; - -/// Extract typos from the buffer. -#[derive(Clone)] -pub struct ParserBuilder<'p, 'd> { - tokenizer: Option<&'p tokens::Tokenizer>, - dictionary: &'d dyn Dictionary, -} - -impl<'p> ParserBuilder<'p, 'static> { - pub fn new() -> Self { - Default::default() - } -} - -impl<'p, 'd> ParserBuilder<'p, 'd> { - /// Set the Tokenizer used when parsing. - pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self { - self.tokenizer = Some(tokenizer); - self - } - - /// Set the dictionary used when parsing. - pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> { - ParserBuilder { - tokenizer: self.tokenizer, - dictionary, - } - } - - /// Extract typos from the buffer. - pub fn build(&self) -> TyposParser<'p, 'd> { - TyposParser { - tokenizer: self.tokenizer.unwrap_or(&DEFAULT_TOKENIZER), - dictionary: self.dictionary, - } - } -} - -impl<'p> Default for ParserBuilder<'p, 'static> { - fn default() -> Self { - Self { - tokenizer: None, - dictionary: &crate::NullDictionary, - } - } -} - -static DEFAULT_TOKENIZER: once_cell::sync::Lazy = - once_cell::sync::Lazy::new(tokens::Tokenizer::new); - -/// Extract typos from the buffer. -#[derive(Clone)] -pub struct TyposParser<'p, 'd> { - tokenizer: &'p tokens::Tokenizer, - dictionary: &'d dyn Dictionary, -} - -impl<'p, 'd> TyposParser<'p, 'd> { - pub fn parse_str<'b, 's: 'b>(&'s self, buffer: &'b str) -> impl Iterator> { - self.tokenizer - .parse_str(buffer) - .flat_map(move |ident| self.process_ident(ident)) - } - - pub fn parse_bytes<'b, 's: 'b>(&'s self, buffer: &'b [u8]) -> impl Iterator> { - self.tokenizer - .parse_bytes(buffer) - .flat_map(move |ident| self.process_ident(ident)) - } - - fn process_ident<'i, 's: 'i>( - &'s self, - ident: tokens::Identifier<'i>, - ) -> impl Iterator> { - match self.dictionary.correct_ident(ident) { - Some(crate::Status::Valid) => itertools::Either::Left(None.into_iter()), - Some(corrections) => { - let typo = Typo { - byte_offset: ident.offset(), - typo: ident.token().into(), - corrections, - }; - itertools::Either::Left(Some(typo).into_iter()) - } - None => itertools::Either::Right( - ident - .split() - .filter_map(move |word| self.process_word(word)), - ), - } - } - - fn process_word<'w, 's: 'w>(&'s self, word: tokens::Word<'w>) -> Option> { - match self.dictionary.correct_word(word) { - Some(crate::Status::Valid) => None, - Some(corrections) => { - let typo = Typo { - byte_offset: word.offset(), - typo: word.token().into(), - corrections, - }; - Some(typo) - } - None => None, - } - } -} - -/// An invalid term found in the buffer. -#[derive(Clone, Debug)] -#[non_exhaustive] -pub struct Typo<'m> { - pub byte_offset: usize, - pub typo: Cow<'m, str>, - pub corrections: crate::Status<'m>, -} - -impl<'m> Typo<'m> { - pub fn into_owned(self) -> Typo<'static> { - Typo { - byte_offset: self.byte_offset, - typo: Cow::Owned(self.typo.into_owned()), - corrections: self.corrections.into_owned(), - } - } - - pub fn borrow(&self) -> Typo<'_> { - Typo { - byte_offset: self.byte_offset, - typo: Cow::Borrowed(self.typo.as_ref()), - corrections: self.corrections.borrow(), - } - } -} - -impl<'m> Default for Typo<'m> { - fn default() -> Self { - Self { - byte_offset: 0, - typo: "".into(), - corrections: crate::Status::Invalid, - } - } -} diff --git a/src/checks.rs b/src/checks.rs index b8174c1f4..87dbbefee 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -70,14 +70,9 @@ impl FileChecker for Typos { dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let parser = typos::ParserBuilder::new() - .tokenizer(tokenizer) - .dictionary(dictionary) - .build(); - if settings.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { - for typo in parser.parse_str(file_name) { + for typo in typos::check_str(file_name, tokenizer, dictionary) { let msg = report::Typo { context: Some(report::PathContext { path }.into()), buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()), @@ -97,7 +92,7 @@ impl FileChecker for Typos { reporter.report(msg.into())?; } else { let mut accum_line_num = AccumulateLineNum::new(); - for typo in parser.parse_bytes(&buffer) { + for typo in typos::check_bytes(&buffer, tokenizer, dictionary) { let line_num = accum_line_num.line_num(&buffer, typo.byte_offset); let (line, line_offset) = extract_line(&buffer, typo.byte_offset); let msg = report::Typo { @@ -129,11 +124,6 @@ impl FileChecker for FixTypos { dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let parser = typos::ParserBuilder::new() - .tokenizer(tokenizer) - .dictionary(dictionary) - .build(); - if settings.check_files { let (buffer, content_type) = read_file(path, reporter)?; if !explicit && !settings.binary && content_type.is_binary() { @@ -142,7 +132,7 @@ impl FileChecker for FixTypos { } else { let mut fixes = Vec::new(); let mut accum_line_num = AccumulateLineNum::new(); - for typo in parser.parse_bytes(&buffer) { + for typo in typos::check_bytes(&buffer, tokenizer, dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -169,7 +159,7 @@ impl FileChecker for FixTypos { if settings.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { let mut fixes = Vec::new(); - for typo in parser.parse_str(file_name) { + for typo in typos::check_str(file_name, tokenizer, dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -211,11 +201,6 @@ impl FileChecker for DiffTypos { dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { - let parser = typos::ParserBuilder::new() - .tokenizer(tokenizer) - .dictionary(dictionary) - .build(); - let mut content = Vec::new(); let mut new_content = Vec::new(); if settings.check_files { @@ -226,7 +211,7 @@ impl FileChecker for DiffTypos { } else { let mut fixes = Vec::new(); let mut accum_line_num = AccumulateLineNum::new(); - for typo in parser.parse_bytes(&buffer) { + for typo in typos::check_bytes(&buffer, tokenizer, dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else { @@ -254,7 +239,7 @@ impl FileChecker for DiffTypos { if settings.check_filenames { if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) { let mut fixes = Vec::new(); - for typo in parser.parse_str(file_name) { + for typo in typos::check_str(file_name, tokenizer, dictionary) { if is_fixable(&typo) { fixes.push(typo.into_owned()); } else {