Skip to content

Commit

Permalink
refactor(typos): Open up the name Parser
Browse files Browse the repository at this point in the history
  • Loading branch information
Ed Page committed Jan 2, 2021
1 parent 7fdd0de commit 1e64080
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 48 deletions.
10 changes: 5 additions & 5 deletions benches/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use typos::checks::Check;

fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
Expand Down Expand Up @@ -46,7 +46,7 @@ fn parse_idents_corpus_str(b: &mut test::Bencher) {

fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_identifier_parser();
b.iter(|| {
checks.check_bytes(
Expand Down Expand Up @@ -90,7 +90,7 @@ fn parse_idents_corpus_bytes(b: &mut test::Bencher) {

fn bench_parse_word_str(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_word_parser();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
Expand Down Expand Up @@ -127,7 +127,7 @@ fn parse_words_corpus(b: &mut test::Bencher) {

fn bench_typos(data: &str, b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_typos();
b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent));
}
Expand Down Expand Up @@ -168,7 +168,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) {
sample_path.write_str(data).unwrap();

let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
let checks = typos::checks::TyposSettings::new().build_typos();
b.iter(|| {
checks.check_file(
Expand Down
12 changes: 6 additions & 6 deletions benches/tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,39 +6,39 @@ mod data;

#[bench]
fn ident_parse_empty(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last());
}

#[bench]
fn ident_parse_no_tokens(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last());
}

#[bench]
fn ident_parse_single_token(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| {
parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last();
});
}

#[bench]
fn ident_parse_sherlock(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last());
}

#[bench]
fn ident_parse_code(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last());
}

#[bench]
fn ident_parse_corpus(b: &mut test::Bencher) {
let parser = typos::tokens::Parser::new();
let parser = typos::tokens::Tokenizer::new();
b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last());
}

Expand Down
28 changes: 14 additions & 14 deletions crates/typos/src/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ pub trait Check: Send + Sync {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error>;

fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error>;
Expand All @@ -31,7 +31,7 @@ pub trait Check: Send + Sync {
fn check_filename(
&self,
path: &std::path::Path,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand All @@ -54,7 +54,7 @@ pub trait Check: Send + Sync {
&self,
path: &std::path::Path,
explicit: bool,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand Down Expand Up @@ -172,7 +172,7 @@ impl Check for Typos {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand Down Expand Up @@ -217,7 +217,7 @@ impl Check for Typos {
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand Down Expand Up @@ -284,7 +284,7 @@ impl Check for ParseIdentifiers {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand All @@ -303,7 +303,7 @@ impl Check for ParseIdentifiers {
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand Down Expand Up @@ -343,7 +343,7 @@ impl Check for ParseWords {
fn check_str(
&self,
buffer: &str,
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand All @@ -365,7 +365,7 @@ impl Check for ParseWords {
fn check_bytes(
&self,
buffer: &[u8],
parser: &tokens::Parser,
parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand Down Expand Up @@ -404,7 +404,7 @@ impl Check for Files {
fn check_str(
&self,
_buffer: &str,
_parser: &tokens::Parser,
_parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
_reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand All @@ -414,7 +414,7 @@ impl Check for Files {
fn check_bytes(
&self,
_buffer: &[u8],
_parser: &tokens::Parser,
_parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
_reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand All @@ -436,7 +436,7 @@ impl Check for Files {
fn check_filename(
&self,
_path: &std::path::Path,
_parser: &tokens::Parser,
_parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
_reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand All @@ -447,7 +447,7 @@ impl Check for Files {
&self,
path: &std::path::Path,
_explicit: bool,
_parser: &tokens::Parser,
_parser: &tokens::Tokenizer,
_dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<(), std::io::Error> {
Expand Down
38 changes: 19 additions & 19 deletions crates/typos/src/tokens.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ParserBuilder {
pub struct TokenizerBuilder {
ignore_hex: bool,
leading_digits: bool,
leading_chars: String,
include_digits: bool,
include_chars: String,
}

impl ParserBuilder {
impl TokenizerBuilder {
pub fn new() -> Self {
Default::default()
}
Expand Down Expand Up @@ -37,7 +37,7 @@ impl ParserBuilder {
self
}

pub fn build(&self) -> Parser {
pub fn build(&self) -> Tokenizer {
let mut pattern = r#"\b("#.to_owned();
Self::push_pattern(&mut pattern, self.leading_digits, &self.leading_chars);
Self::push_pattern(&mut pattern, self.include_digits, &self.include_chars);
Expand All @@ -46,7 +46,7 @@ impl ParserBuilder {
let words_str = regex::Regex::new(&pattern).unwrap();
let words_bytes = regex::bytes::Regex::new(&pattern).unwrap();

Parser {
Tokenizer {
words_str,
words_bytes,
// `leading_digits` let's us bypass the regexes since you can't have a decimal or
Expand All @@ -69,7 +69,7 @@ impl ParserBuilder {
}
}

impl Default for ParserBuilder {
impl Default for TokenizerBuilder {
fn default() -> Self {
Self {
ignore_hex: true,
Expand All @@ -82,16 +82,16 @@ impl Default for ParserBuilder {
}

#[derive(Debug, Clone)]
pub struct Parser {
pub struct Tokenizer {
words_str: regex::Regex,
words_bytes: regex::bytes::Regex,
ignore_numbers: bool,
ignore_hex: bool,
}

impl Parser {
impl Tokenizer {
pub fn new() -> Self {
ParserBuilder::default().build()
TokenizerBuilder::default().build()
}

pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
Expand Down Expand Up @@ -124,7 +124,7 @@ impl Parser {
}
}

impl Default for Parser {
impl Default for Tokenizer {
fn default() -> Self {
Self::new()
}
Expand Down Expand Up @@ -387,7 +387,7 @@ mod test {

#[test]
fn tokenize_empty_is_empty() {
let parser = Parser::new();
let parser = Tokenizer::new();

let input = "";
let expected: Vec<Identifier> = vec![];
Expand All @@ -399,7 +399,7 @@ mod test {

#[test]
fn tokenize_word_is_word() {
let parser = Parser::new();
let parser = Tokenizer::new();

let input = "word";
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("word", 0)];
Expand All @@ -411,7 +411,7 @@ mod test {

#[test]
fn tokenize_space_separated_words() {
let parser = Parser::new();
let parser = Tokenizer::new();

let input = "A B";
let expected: Vec<Identifier> = vec![
Expand All @@ -426,7 +426,7 @@ mod test {

#[test]
fn tokenize_dot_separated_words() {
let parser = Parser::new();
let parser = Tokenizer::new();

let input = "A.B";
let expected: Vec<Identifier> = vec![
Expand All @@ -441,7 +441,7 @@ mod test {

#[test]
fn tokenize_namespace_separated_words() {
let parser = Parser::new();
let parser = Tokenizer::new();

let input = "A::B";
let expected: Vec<Identifier> = vec![
Expand All @@ -456,7 +456,7 @@ mod test {

#[test]
fn tokenize_underscore_doesnt_separate() {
let parser = Parser::new();
let parser = Tokenizer::new();

let input = "A_B";
let expected: Vec<Identifier> = vec![Identifier::new_unchecked("A_B", 0)];
Expand All @@ -468,7 +468,7 @@ mod test {

#[test]
fn tokenize_ignore_hex_enabled() {
let parser = ParserBuilder::new().ignore_hex(true).build();
let parser = TokenizerBuilder::new().ignore_hex(true).build();

let input = "Hello 0xDEADBEEF World";
let expected: Vec<Identifier> = vec![
Expand All @@ -483,7 +483,7 @@ mod test {

#[test]
fn tokenize_ignore_hex_disabled() {
let parser = ParserBuilder::new()
let parser = TokenizerBuilder::new()
.ignore_hex(false)
.leading_digits(true)
.build();
Expand Down Expand Up @@ -523,11 +523,11 @@ mod test {
&[("A", Case::Scream, 0), ("String", Case::Title, 1)],
),
(
"SimpleXMLParser",
"SimpleXMLTokenizer",
&[
("Simple", Case::Title, 0),
("XML", Case::Scream, 6),
("Parser", Case::Title, 9),
("Tokenizer", Case::Title, 9),
],
),
(
Expand Down
Loading

0 comments on commit 1e64080

Please sign in to comment.