Skip to content

Commit

Permalink
refactor(typos): Decouple parsing from checks
Browse files Browse the repository at this point in the history
  • Loading branch information
Ed Page committed Jan 2, 2021
1 parent 1e64080 commit e741f96
Show file tree
Hide file tree
Showing 5 changed files with 250 additions and 111 deletions.
170 changes: 64 additions & 106 deletions crates/typos/src/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use bstr::ByteSlice;
use crate::report;
use crate::tokens;
use crate::Dictionary;
use crate::Status;

pub trait Check: Send + Sync {
fn check_str(
Expand Down Expand Up @@ -172,91 +171,48 @@ impl Check for Typos {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Tokenizer,
tokenizer: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
for ident in parser.parse_str(buffer) {
match dictionary.correct_ident(ident) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = ident.offset();
let msg = report::Typo {
context: None,
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
byte_offset,
typo: ident.token(),
corrections,
};
reporter.report(msg.into())?;
}
None => {
for word in ident.split() {
match dictionary.correct_word(word) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = word.offset();
let msg = report::Typo {
context: None,
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
byte_offset,
typo: word.token(),
corrections,
};
reporter.report(msg.into())?;
}
None => {}
}
}
}
}
let parser = crate::ParserBuilder::new()
.tokenizer(tokenizer)
.dictionary(dictionary)
.typos();
for typo in parser.parse_str(buffer) {
let msg = report::Typo {
context: None,
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
byte_offset: typo.byte_offset,
typo: typo.typo,
corrections: typo.corrections,
};
reporter.report(msg.into())?;
}
Ok(())
}

fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Tokenizer,
tokenizer: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
for ident in parser.parse_bytes(buffer) {
match dictionary.correct_ident(ident) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = ident.offset();
let msg = report::Typo {
context: None,
buffer: std::borrow::Cow::Borrowed(buffer),
byte_offset,
typo: ident.token(),
corrections,
};
reporter.report(msg.into())?;
}
None => {
for word in ident.split() {
match dictionary.correct_word(word) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = word.offset();
let msg = report::Typo {
context: None,
buffer: std::borrow::Cow::Borrowed(buffer),
byte_offset,
typo: word.token(),
corrections,
};
reporter.report(msg.into())?;
}
None => {}
}
}
}
}
let parser = crate::ParserBuilder::new()
.tokenizer(tokenizer)
.dictionary(dictionary)
.typos();
for typo in parser.parse_bytes(buffer) {
let msg = report::Typo {
context: None,
buffer: std::borrow::Cow::Borrowed(buffer.as_bytes()),
byte_offset: typo.byte_offset,
typo: typo.typo,
corrections: typo.corrections,
};
reporter.report(msg.into())?;
}

Ok(())
}

Expand Down Expand Up @@ -284,16 +240,19 @@ impl Check for ParseIdentifiers {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Tokenizer,
tokenizer: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
let msg = report::Parse {
context: None,
kind: report::ParseKind::Identifier,
data: parser.parse_str(buffer).map(|i| i.token()).collect(),
};
if !msg.data.is_empty() {
let parser = crate::ParserBuilder::new()
.tokenizer(tokenizer)
.identifiers();
for word in parser.parse_str(buffer) {
let msg = report::Parse {
context: None,
kind: report::ParseKind::Word,
data: word.token(),
};
reporter.report(msg.into())?;
}

Expand All @@ -303,16 +262,19 @@ impl Check for ParseIdentifiers {
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Tokenizer,
tokenizer: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
let msg = report::Parse {
context: None,
kind: report::ParseKind::Identifier,
data: parser.parse_bytes(buffer).map(|i| i.token()).collect(),
};
if !msg.data.is_empty() {
let parser = crate::ParserBuilder::new()
.tokenizer(tokenizer)
.identifiers();
for word in parser.parse_bytes(buffer) {
let msg = report::Parse {
context: None,
kind: report::ParseKind::Word,
data: word.token(),
};
reporter.report(msg.into())?;
}

Expand Down Expand Up @@ -343,19 +305,17 @@ impl Check for ParseWords {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Tokenizer,
tokenizer: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
let msg = report::Parse {
context: None,
kind: report::ParseKind::Word,
data: parser
.parse_str(buffer)
.flat_map(|ident| ident.split().map(|i| i.token()))
.collect(),
};
if !msg.data.is_empty() {
let word_parser = crate::ParserBuilder::new().tokenizer(tokenizer).words();
for word in word_parser.parse_str(buffer) {
let msg = report::Parse {
context: None,
kind: report::ParseKind::Word,
data: word.token(),
};
reporter.report(msg.into())?;
}

Expand All @@ -365,19 +325,17 @@ impl Check for ParseWords {
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Tokenizer,
tokenizer: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
let msg = report::Parse {
context: None,
kind: report::ParseKind::Word,
data: parser
.parse_bytes(buffer)
.flat_map(|ident| ident.split().map(|i| i.token()))
.collect(),
};
if !msg.data.is_empty() {
let parser = crate::ParserBuilder::new().tokenizer(tokenizer).words();
for word in parser.parse_bytes(buffer) {
let msg = report::Parse {
context: None,
kind: report::ParseKind::Word,
data: word.token(),
};
reporter.report(msg.into())?;
}

Expand Down
15 changes: 15 additions & 0 deletions crates/typos/src/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,18 @@ pub trait Dictionary: Send + Sync {

fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
}

pub(crate) struct NullDictionary;

impl Dictionary for NullDictionary {
fn correct_ident<'s, 'w>(
&'s self,
_ident: crate::tokens::Identifier<'w>,
) -> Option<Status<'s>> {
None
}

fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option<Status<'s>> {
None
}
}
4 changes: 3 additions & 1 deletion crates/typos/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
mod dict;
mod parser;

pub mod checks;
pub mod report;
pub mod tokens;

pub use crate::dict::*;
pub use dict::*;
pub use parser::*;
Loading

0 comments on commit e741f96

Please sign in to comment.