Skip to content

Commit

Permalink
Merge pull request #164 from epage/perf
Browse files Browse the repository at this point in the history
perf: Avoid hashing
  • Loading branch information
epage authored Nov 11, 2020
2 parents b44ab02 + 6bdbd82 commit 36709b6
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 17 deletions.
26 changes: 24 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ toml = "0.5"
log = "0.4"
env_logger = "0.8"
bstr = "0.2"
ahash = "0.5.8"

[dev-dependencies]
assert_fs = "1.0"
Expand Down
8 changes: 8 additions & 0 deletions crates/typos-dict/codegen/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ fn generate<W: std::io::Write>(file: &mut W) {
writeln!(file).unwrap();
writeln!(file, "use unicase::UniCase;").unwrap();

let mut smallest = usize::MAX;
let mut largest = usize::MIN;

writeln!(
file,
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
Expand All @@ -26,12 +29,17 @@ fn generate<W: std::io::Write>(file: &mut W) {
.map(|r| r.unwrap())
.collect();
for record in &records {
smallest = std::cmp::min(smallest, record[0].len());
largest = std::cmp::max(largest, record[0].len());
let value = format!(r#""{}""#, &record[1]);
builder.entry(unicase::UniCase::new(&record[0]), &value);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();
writeln!(file).unwrap();
writeln!(file, "pub const WORD_MIN: usize = {};", smallest).unwrap();
writeln!(file, "pub const WORD_MAX: usize = {};", largest).unwrap();
}

#[derive(Debug, StructOpt)]
Expand Down
3 changes: 3 additions & 0 deletions crates/typos-dict/src/dict_codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33648,3 +33648,6 @@ pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static st
(UniCase::ascii("presumpton"), "presumption"),
]),
};

pub const WORD_MIN: usize = 3;
pub const WORD_MAX: usize = 19;
9 changes: 9 additions & 0 deletions crates/typos-vars/codegen/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
writeln!(file, "}}").unwrap();
writeln!(file).unwrap();

let mut smallest = usize::MAX;
let mut largest = usize::MIN;

writeln!(
file,
"pub static VARS_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [(u8, &VariantsMap)]> = "
Expand All @@ -92,11 +95,17 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
referenced_symbols.extend(data.iter().map(|(s, _)| s));
let value = generate_link(&data);
builder.entry(unicase::UniCase::new(word), &value);
smallest = std::cmp::min(smallest, word.len());
largest = std::cmp::max(largest, word.len());
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
writeln!(file, ";").unwrap();

writeln!(file).unwrap();
writeln!(file, "pub const WORD_MIN: usize = {};", smallest).unwrap();
writeln!(file, "pub const WORD_MAX: usize = {};", largest).unwrap();

for (symbol, entry) in entries.iter() {
if !referenced_symbols.contains(symbol.as_str()) {
continue;
Expand Down
3 changes: 3 additions & 0 deletions crates/typos-vars/src/vars_codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113081,6 +113081,9 @@ pub static VARS_DICTIONARY: phf::Map<
),
]),
};

pub const WORD_MIN: usize = 2;
pub const WORD_MAX: usize = 24;
pub(crate) static ENTRY_ABETTORS_7043394254318611656: VariantsMap =
[&["abettors"], &["abetters"], &["abettors"], &["abetters"]];

Expand Down
53 changes: 38 additions & 15 deletions src/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,24 @@ impl BuiltIn {

// Not using `Status` to avoid the allocations
fn correct_with_dict(&self, word: &str) -> Option<&'static str> {
map_lookup(&typos_dict::WORD_DICTIONARY, word)
const WORD_RANGE: std::ops::RangeInclusive<usize> =
typos_dict::WORD_MIN..=typos_dict::WORD_MAX;
if WORD_RANGE.contains(&word.len()) {
map_lookup(&typos_dict::WORD_DICTIONARY, word)
} else {
None
}
}

fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
map_lookup(&typos_vars::VARS_DICTIONARY, word).map(|variants| self.select_variant(variants))
const WORD_RANGE: std::ops::RangeInclusive<usize> =
typos_vars::WORD_MIN..=typos_vars::WORD_MAX;
if WORD_RANGE.contains(&word.len()) {
map_lookup(&typos_vars::VARS_DICTIONARY, word)
.map(|variants| self.select_variant(variants))
} else {
None
}
}

fn select_variant(
Expand Down Expand Up @@ -144,8 +157,8 @@ fn case_correct(correction: &mut Cow<'_, str>, case: Case) {
}

pub struct Override<'i, 'w, D> {
identifiers: HashMap<&'i str, Status<'i>>,
words: HashMap<unicase::UniCase<&'w str>, Status<'w>>,
identifiers: HashMap<&'i str, Status<'i>, ahash::RandomState>,
words: HashMap<unicase::UniCase<&'w str>, Status<'w>, ahash::RandomState>,
inner: D,
}

Expand All @@ -168,7 +181,7 @@ impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
.collect();
}

pub fn interpret<'z, I: Iterator<Item = (&'z str, &'z str)>>(
fn interpret<'z, I: Iterator<Item = (&'z str, &'z str)>>(
cases: I,
) -> impl Iterator<Item = (&'z str, Status<'z>)> {
cases.map(|(typo, correction)| {
Expand All @@ -186,19 +199,29 @@ impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {

impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Option<Status<'s>> {
self.identifiers
.get(ident.token())
.map(|c| c.borrow())
.or_else(|| self.inner.correct_ident(ident))
// Skip hashing if we can
if !self.identifiers.is_empty() {
self.identifiers
.get(ident.token())
.map(|c| c.borrow())
.or_else(|| self.inner.correct_ident(ident))
} else {
None
}
}

fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Option<Status<'s>> {
let w = UniCase::new(word.token());
// HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow`
self.words
.get(&w)
.cloned()
.or_else(|| self.inner.correct_word(word))
// Skip hashing if we can
if !self.words.is_empty() {
let w = UniCase::new(word.token());
// HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow`
self.words
.get(&w)
.cloned()
.or_else(|| self.inner.correct_word(word))
} else {
None
}
}
}

Expand Down

0 comments on commit 36709b6

Please sign in to comment.