Skip to content

Commit

Permalink
refactor(config)!: Detect when no dict config
Browse files Browse the repository at this point in the history
In preparing for smarter handling of config, we need to be able to tell
what is present and what isn't.

BREAKING CHANGE: `--hex` was removed, the value didn't seem high enough.
  • Loading branch information
Ed Page committed Mar 30, 2021
1 parent 8bcacf3 commit 4bbc59f
Show file tree
Hide file tree
Showing 6 changed files with 203 additions and 119 deletions.
4 changes: 2 additions & 2 deletions benches/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use typos_cli::file::FileChecker;

fn bench_checks(c: &mut Criterion) {
let dictionary = typos_cli::dict::BuiltIn::new(Default::default());
let dict = typos_cli::dict::BuiltIn::new(Default::default());
let tokenizer = typos::tokens::Tokenizer::new();
let policy = typos_cli::policy::Policy::new()
.dictionary(&dictionary)
.dict(&dict)
.tokenizer(&tokenizer);

let mut group = c.benchmark_group("checks");
Expand Down
2 changes: 1 addition & 1 deletion docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ Configuration is read from the following (in precedence order)
| default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. |
| default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. |
| default.identifier-include-chars | \- | string | Allow identifiers to include these characters. |
| default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
| default.locale | --locale | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
| default.extend-identifiers | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
| default.extend-words | \- | table of strings | Corrections for identifiers. When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
17 changes: 4 additions & 13 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,6 @@ pub(crate) struct FileArgs {
#[structopt(long, overrides_with("no-check-files"), hidden(true))]
check_files: bool,

#[structopt(long, overrides_with("hex"))]
/// Don't try to detect that an identifier looks like hex
no_hex: bool,
#[structopt(long, overrides_with("no-hex"), hidden(true))]
hex: bool,

#[structopt(
long,
possible_values(&config::Locale::variants()),
Expand Down Expand Up @@ -163,15 +157,12 @@ impl config::FileSource for FileArgs {
}
}

fn ignore_hex(&self) -> Option<bool> {
match (self.hex, self.no_hex) {
(true, false) => Some(true),
(false, true) => Some(false),
(false, false) => None,
(_, _) => unreachable!("StructOpt should make this impossible"),
}
fn dict(&self) -> Option<&dyn config::DictSource> {
Some(self)
}
}

impl config::DictSource for FileArgs {
fn locale(&self) -> Option<config::Locale> {
self.locale
}
Expand Down
240 changes: 162 additions & 78 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ pub trait FileSource {
None
}

fn tokenizer(&self) -> Option<&dyn TokenizerSource> {
None
}

fn dict(&self) -> Option<&dyn DictSource> {
None
}
}

pub trait TokenizerSource {
/// Do not check identifiers that appear to be hexadecimal values.
fn ignore_hex(&self) -> Option<bool> {
None
Expand All @@ -82,7 +92,9 @@ pub trait FileSource {
fn identifier_include_chars(&self) -> Option<&str> {
None
}
}

pub trait DictSource {
fn locale(&self) -> Option<Locale> {
None
}
Expand Down Expand Up @@ -258,14 +270,10 @@ pub struct FileConfig {
pub binary: Option<bool>,
pub check_filename: Option<bool>,
pub check_file: Option<bool>,
pub ignore_hex: Option<bool>,
pub identifier_leading_digits: Option<bool>,
pub identifier_leading_chars: Option<kstring::KString>,
pub identifier_include_digits: Option<bool>,
pub identifier_include_chars: Option<kstring::KString>,
pub locale: Option<Locale>,
pub extend_identifiers: HashMap<kstring::KString, kstring::KString>,
pub extend_words: HashMap<kstring::KString, kstring::KString>,
#[serde(flatten)]
pub tokenizer: Option<TokenizerConfig>,
#[serde(flatten)]
pub dict: Option<DictConfig>,
}

impl FileConfig {
Expand All @@ -275,18 +283,12 @@ impl FileConfig {
binary: Some(empty.binary()),
check_filename: Some(empty.check_filename()),
check_file: Some(empty.check_file()),
ignore_hex: Some(empty.ignore_hex()),
identifier_leading_digits: Some(empty.identifier_leading_digits()),
identifier_leading_chars: Some(kstring::KString::from_ref(
empty.identifier_leading_chars(),
)),
identifier_include_digits: Some(empty.identifier_include_digits()),
identifier_include_chars: Some(kstring::KString::from_ref(
empty.identifier_include_chars(),
)),
locale: Some(empty.locale()),
extend_identifiers: Default::default(),
extend_words: Default::default(),
tokenizer: Some(
empty
.tokenizer
.unwrap_or_else(|| TokenizerConfig::from_defaults()),
),
dict: Some(empty.dict.unwrap_or_else(|| DictConfig::from_defaults())),
}
}

Expand All @@ -300,34 +302,22 @@ impl FileConfig {
if let Some(source) = source.check_file() {
self.check_file = Some(source);
}
if let Some(source) = source.ignore_hex() {
self.ignore_hex = Some(source);
if let Some(source) = source.tokenizer() {
let mut tokenizer = None;
std::mem::swap(&mut tokenizer, &mut self.tokenizer);
let mut tokenizer = tokenizer.unwrap_or_default();
tokenizer.update(source);
let mut tokenizer = Some(tokenizer);
std::mem::swap(&mut tokenizer, &mut self.tokenizer);
}
if let Some(source) = source.identifier_leading_digits() {
self.identifier_leading_digits = Some(source);
}
if let Some(source) = source.identifier_leading_chars() {
self.identifier_leading_chars = Some(kstring::KString::from_ref(source));
}
if let Some(source) = source.identifier_include_digits() {
self.identifier_include_digits = Some(source);
if let Some(source) = source.dict() {
let mut dict = None;
std::mem::swap(&mut dict, &mut self.dict);
let mut dict = dict.unwrap_or_default();
dict.update(source);
let mut dict = Some(dict);
std::mem::swap(&mut dict, &mut self.dict);
}
if let Some(source) = source.identifier_include_chars() {
self.identifier_include_chars = Some(kstring::KString::from_ref(source));
}
if let Some(source) = source.locale() {
self.locale = Some(source);
}
self.extend_identifiers.extend(
source
.extend_identifiers()
.map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))),
);
self.extend_words.extend(
source
.extend_words()
.map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))),
);
}

pub fn binary(&self) -> bool {
Expand All @@ -341,61 +331,97 @@ impl FileConfig {
pub fn check_file(&self) -> bool {
self.check_file.unwrap_or(true)
}
}

pub fn ignore_hex(&self) -> bool {
self.ignore_hex.unwrap_or(true)
impl FileSource for FileConfig {
fn binary(&self) -> Option<bool> {
self.binary
}

pub fn identifier_leading_digits(&self) -> bool {
self.identifier_leading_digits.unwrap_or(false)
fn check_filename(&self) -> Option<bool> {
self.check_filename
}

pub fn identifier_leading_chars(&self) -> &str {
self.identifier_leading_chars.as_deref().unwrap_or("_")
fn check_file(&self) -> Option<bool> {
self.check_file
}

pub fn identifier_include_digits(&self) -> bool {
self.identifier_include_digits.unwrap_or(true)
fn tokenizer(&self) -> Option<&dyn TokenizerSource> {
self.tokenizer.as_ref().map(|t| t as &dyn TokenizerSource)
}

pub fn identifier_include_chars(&self) -> &str {
self.identifier_include_chars.as_deref().unwrap_or("_'")
fn dict(&self) -> Option<&dyn DictSource> {
self.dict.as_ref().map(|d| d as &dyn DictSource)
}
}

pub fn locale(&self) -> Locale {
self.locale.unwrap_or_default()
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
#[serde(deny_unknown_fields, default)]
#[serde(rename_all = "kebab-case")]
pub struct TokenizerConfig {
pub ignore_hex: Option<bool>,
pub identifier_leading_digits: Option<bool>,
pub identifier_leading_chars: Option<kstring::KString>,
pub identifier_include_digits: Option<bool>,
pub identifier_include_chars: Option<kstring::KString>,
}

impl TokenizerConfig {
pub fn from_defaults() -> Self {
let empty = Self::default();
Self {
ignore_hex: Some(empty.ignore_hex()),
identifier_leading_digits: Some(empty.identifier_leading_digits()),
identifier_leading_chars: Some(kstring::KString::from_ref(
empty.identifier_leading_chars(),
)),
identifier_include_digits: Some(empty.identifier_include_digits()),
identifier_include_chars: Some(kstring::KString::from_ref(
empty.identifier_include_chars(),
)),
}
}

pub fn extend_identifiers(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
Box::new(
self.extend_identifiers
.iter()
.map(|(k, v)| (k.as_str(), v.as_str())),
)
pub fn update(&mut self, source: &dyn TokenizerSource) {
if let Some(source) = source.ignore_hex() {
self.ignore_hex = Some(source);
}
if let Some(source) = source.identifier_leading_digits() {
self.identifier_leading_digits = Some(source);
}
if let Some(source) = source.identifier_leading_chars() {
self.identifier_leading_chars = Some(kstring::KString::from_ref(source));
}
if let Some(source) = source.identifier_include_digits() {
self.identifier_include_digits = Some(source);
}
if let Some(source) = source.identifier_include_chars() {
self.identifier_include_chars = Some(kstring::KString::from_ref(source));
}
}

pub fn extend_words(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
Box::new(
self.extend_words
.iter()
.map(|(k, v)| (k.as_str(), v.as_str())),
)
pub fn ignore_hex(&self) -> bool {
self.ignore_hex.unwrap_or(true)
}
}

impl FileSource for FileConfig {
fn binary(&self) -> Option<bool> {
self.binary
pub fn identifier_leading_digits(&self) -> bool {
self.identifier_leading_digits.unwrap_or(false)
}

fn check_filename(&self) -> Option<bool> {
self.check_filename
pub fn identifier_leading_chars(&self) -> &str {
self.identifier_leading_chars.as_deref().unwrap_or("_")
}

fn check_file(&self) -> Option<bool> {
self.check_file
pub fn identifier_include_digits(&self) -> bool {
self.identifier_include_digits.unwrap_or(true)
}

pub fn identifier_include_chars(&self) -> &str {
self.identifier_include_chars.as_deref().unwrap_or("_'")
}
}

impl TokenizerSource for TokenizerConfig {
fn ignore_hex(&self) -> Option<bool> {
self.ignore_hex
}
Expand All @@ -415,7 +441,65 @@ impl FileSource for FileConfig {
fn identifier_include_chars(&self) -> Option<&str> {
self.identifier_include_chars.as_deref()
}
}

#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
#[serde(deny_unknown_fields, default)]
#[serde(rename_all = "kebab-case")]
pub struct DictConfig {
pub locale: Option<Locale>,
pub extend_identifiers: HashMap<kstring::KString, kstring::KString>,
pub extend_words: HashMap<kstring::KString, kstring::KString>,
}

impl DictConfig {
pub fn from_defaults() -> Self {
let empty = Self::default();
Self {
locale: Some(empty.locale()),
extend_identifiers: Default::default(),
extend_words: Default::default(),
}
}

pub fn update(&mut self, source: &dyn DictSource) {
if let Some(source) = source.locale() {
self.locale = Some(source);
}
self.extend_identifiers.extend(
source
.extend_identifiers()
.map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))),
);
self.extend_words.extend(
source
.extend_words()
.map(|(k, v)| (kstring::KString::from_ref(k), kstring::KString::from_ref(v))),
);
}

pub fn locale(&self) -> Locale {
self.locale.unwrap_or_default()
}

pub fn extend_identifiers(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
Box::new(
self.extend_identifiers
.iter()
.map(|(k, v)| (k.as_str(), v.as_str())),
)
}

pub fn extend_words(&self) -> Box<dyn Iterator<Item = (&str, &str)> + '_> {
Box::new(
self.extend_words
.iter()
.map(|(k, v)| (k.as_str(), v.as_str())),
)
}
}

impl DictSource for DictConfig {
fn locale(&self) -> Option<Locale> {
self.locale
}
Expand Down
Loading

0 comments on commit 4bbc59f

Please sign in to comment.