Skip to content

Commit

Permalink
Merge pull request #302 from meilisearch/update-version-v0.9.0
Browse files Browse the repository at this point in the history
Update version for the next release (v0.9.0) in Cargo.toml files
  • Loading branch information
ManyTheFish authored Jul 25, 2024
2 parents 81f0a43 + 9daa84b commit 9854134
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 7 deletions.
4 changes: 2 additions & 2 deletions charabia/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "charabia"
version = "0.8.12"
version = "0.9.0"
license = "MIT"
authors = ["Many <[email protected]>"]
edition = "2021"
Expand Down Expand Up @@ -31,7 +31,7 @@ unicode-normalization = "0.1.23"
irg-kvariants = { path = "../irg-kvariants", version = "=0.1.1" }

[features]
default = ["chinese", "hebrew", "japanese", "thai", "korean", "greek", "latin-camelcase", "latin-snakecase", "khmer", "vietnamese"]
default = ["chinese", "hebrew", "japanese", "thai", "korean", "greek", "latin-camelcase", "latin-snakecase", "khmer", "vietnamese", "swedish-recomposition"]

# allow chinese specialized tokenization
chinese = ["chinese-segmentation", "chinese-normalization"]
Expand Down
4 changes: 2 additions & 2 deletions charabia/src/normalizer/arabic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use crate::{Script, Token};
/// - normalizing the arabic Alef 'أ','إ','آ','ٱ' to 'ا'
/// - normalizing the arabic Yeh 'ى' to 'ي'
/// - Normalizing the arabic Taa Marbuta 'ة' to 'ه'
/// https://en.wikipedia.org/wiki/Arabic_alphabet
/// https://en.wikipedia.org/wiki/Kashida
/// https://en.wikipedia.org/wiki/Arabic_alphabet
/// https://en.wikipedia.org/wiki/Kashida
pub struct ArabicNormalizer;

Expand Down
2 changes: 1 addition & 1 deletion charabia/src/normalizer/swedish_recomposition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::normalizer::NormalizerOption;
use crate::{Language, Token};

static MATCHING_STR: Lazy<AhoCorasick> = Lazy::new(|| {
AhoCorasick::new(&["A\u{30a}", "a\u{30a}", "A\u{308}", "a\u{308}", "O\u{308}", "o\u{308}"])
AhoCorasick::new(["A\u{30a}", "a\u{30a}", "A\u{308}", "a\u{308}", "O\u{308}", "o\u{308}"])
.unwrap()
});

Expand Down
4 changes: 2 additions & 2 deletions charabia/src/separators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
/// - Zl Line Separator
/// - Zp Paragraph Separator
/// - Zs Space Separator
/// plus "\0", ". ", ", " and ។ល។" (៘ decomposition) to categorize them as hard separators
/// and "`" to understand markdown formatted text
/// plus "\0", ". ", ", " and ។ល។" (៘ decomposition) to categorize them as hard separators
/// and "`" to understand markdown formatted text
#[rustfmt::skip]
pub const DEFAULT_SEPARATORS: &[&str] = &[
"\0", ". ", ", ", "_", "‿", "⁀", "⁔", "︳", "︴", "﹍", "﹎", "﹏", "_", "-", "֊", "־", "᐀", "᠆", "‐", "‒", "–",
Expand Down

0 comments on commit 9854134

Please sign in to comment.