From 7a02afd4bf5870608a6f4b43220a49d547eda1fa Mon Sep 17 00:00:00 2001 From: Watheq Alshowaiter Date: Sun, 1 Sep 2024 22:08:41 +0300 Subject: [PATCH] feat: add isFemale parameter --- docs/classes/ArPHP-I18N-Arabic.html | 13 +++++- examples/numbers.php | 7 +++- src/Arabic.php | 62 +++++++++++++++-------------- tests/ArabicTest.php | 6 ++- 4 files changed, 54 insertions(+), 34 deletions(-) diff --git a/docs/classes/ArPHP-I18N-Arabic.html b/docs/classes/ArPHP-I18N-Arabic.html index 56347bd..cad4dbd 100644 --- a/docs/classes/ArPHP-I18N-Arabic.html +++ b/docs/classes/ArPHP-I18N-Arabic.html @@ -856,7 +856,6 @@

public ar2en(string $string[, string $standard = 'UNGEGN' ]) : string -

Parameters
@@ -1061,7 +1060,7 @@

public - arPlural(string $singular, int $count[, string $plural2 = null ][, string $plural3 = null ][, string $plural4 = null ]) : string + arPlural( string  $singular , int  $count [ , string  $plural2 = null ] [ , string  $plural3 = null ] [ , string  $plural4 = null ] [ , bool  $nameOnly = false ] [ , bool  $isFemale = null ] ) : string
@@ -1123,6 +1122,16 @@

Parameters
+
+ $isFemale + : bool + = null
+
+

$isFemale explicitly says that this word is female or not, (e.g., خطأ) is considered by the detection algorithm in the library as female, which is wrong. if we pass explicit that it is not female to fix the output.

+
+ +
+ diff --git a/examples/numbers.php b/examples/numbers.php index 6987abf..1a350a6 100644 --- a/examples/numbers.php +++ b/examples/numbers.php @@ -226,7 +226,12 @@ $number = 4; - $text = $Arabic->arPlural('يوم', $number, nameOnly: true); + $text = $Arabic->arPlural('يوم', $number, nameOnly: true); // str_replace('%d', $number, $text) is redundant in this case + + echo "

$number $text

"; + + $number = 1; + $text = $Arabic->arPlural('خطأ', $number, isFemale: false); $text = str_replace('%d', $number, $text); echo "

$number $text

"; diff --git a/src/Arabic.php b/src/Arabic.php index e3656c1..a137206 100644 --- a/src/Arabic.php +++ b/src/Arabic.php @@ -377,13 +377,13 @@ class Arabic /** @var array */ private $arGapPenalty = array(); - + /** @var float */ private $keyboardWeight = 1; - + /** @var float */ private $graphicWeight = 1; - + /** @var float */ private $phoneticWeight = 1; @@ -414,7 +414,7 @@ public function __construct() $this->arDialectInit(); $this->arSimilarityInit(); } - + /** @return void */ private function arSpellerInit() { $this->speller = new \ArPHP\MZK\Speller(); @@ -424,7 +424,7 @@ private function arSpellerInit() { * Spell Check * * @param string $text Text input - * @return array + * @return array * @author Moutaz Alkhatib */ public function spellGetMisspelled($text) { @@ -4232,13 +4232,15 @@ public function volc($olc, $codeLength = 10) * @return string Proper plural form of the given singular form * @author Khaled Al-Sham'aa */ - public function arPlural($singular, $count, $plural2 = null, $plural3 = null, $plural4 = null, $nameOnly = false) + public function arPlural($singular, $count, $plural2 = null, $plural3 = null, $plural4 = null, $nameOnly = false, $isFemale = null) { + $isFemale = $isFemale === null ? $this->isFemale($singular) : $isFemale; + if ($count == 0) { $plural = is_null($plural2) ? $this->arPluralsForms[$singular][0] : "لا $plural3"; - } elseif ($count == 1 && $this->isFemale($singular)) { + } elseif ($count == 1 && $isFemale) { $plural = is_null($plural2) ? $this->arPluralsForms[$singular][1] : "$singular واحدة"; - } elseif ($count == 1 && !$this->isFemale($singular)) { + } elseif ($count == 1 && !$isFemale) { $plural = is_null($plural2) ? $this->arPluralsForms[$singular][1] : "$singular واحد"; } elseif ($count == 2) { $plural = is_null($plural2) ? $this->arPluralsForms[$singular][2] : $plural2; @@ -4471,7 +4473,7 @@ public function arDialect($text) } $score = max($scoreEgyptian, $scoreLevantine, $scoreMaghrebi, $scorePeninsular); - + switch ($score) { case $scoreEgyptian: $dialect = 'Egyptian'; @@ -4845,10 +4847,10 @@ private function arKeyboardSimilarity($chr1, $chr2) // shift key status (0/1 if pressed) $zi = $this->arKeyZ["$chr1"]; $zj = $this->arKeyZ["$chr2"]; - + // similarity score $score = 0; - + if ($yi == $yj && $xi == $xj) { // the same key + shift status penalty if differ $score = 8 - 4 * abs($zi - $zj); @@ -4862,10 +4864,10 @@ private function arKeyboardSimilarity($chr1, $chr2) // up or down + shift status penalty if differ $score = 2 - 1 * abs($zi - $zj); } - + return $score; } - + private function arGraphicSimilarity($chr1, $chr2) { if (!array_key_exists($chr1, $this->arGraphGroup) || !array_key_exists($chr2, $this->arGraphGroup)) { @@ -4873,7 +4875,7 @@ private function arGraphicSimilarity($chr1, $chr2) } else { $chr1Group = $this->arGraphGroup["$chr1"]; $chr2Group = $this->arGraphGroup["$chr2"]; - + if ($chr1 == $chr2) { $score = 8; } elseif ($chr1Group == $chr2Group) { @@ -4882,10 +4884,10 @@ private function arGraphicSimilarity($chr1, $chr2) $score = 0; } } - + return $score; } - + private function arSoundSimilarity($chr1, $chr2) { if ($chr1 == $chr2) { @@ -4895,17 +4897,17 @@ private function arSoundSimilarity($chr1, $chr2) } else { $chr1Group = $this->arSoundGroup["$chr1"]; $chr2Group = $this->arSoundGroup["$chr2"]; - + if ($chr1Group == $chr2Group) { $score = 4; } else { $score = 0; } } - + return $score; } - + // the similarity score of characters a and b (keyboard, graphic, phonetic) private function S($chr1, $chr2) { @@ -4917,7 +4919,7 @@ private function S($chr1, $chr2) return $score; } - + // gap penalty scores (for each character) private function d($chr) { @@ -4926,10 +4928,10 @@ private function d($chr) } else { $score = 8; } - + return -1 * $score; } - + // https://en.wikipedia.org/wiki/Needleman-Wunsch_algorithm // Needleman-Wunsch algorithm using weighted scoring matrices and gap penalty private function arSimilarityScore($string1, $string2) @@ -4950,36 +4952,36 @@ private function arSimilarityScore($string1, $string2) $chr = mb_substr($string2, $j-1, 1); $F[0][$j] = $this->d($chr) + $F[0][$j-1]; } - + for ($i = 1; $i <= $max1; $i++) { for ($j = 1; $j <= $max2; $j++) { $A = mb_substr($string1, $i-1, 1); $B = mb_substr($string2, $j-1, 1); - + $match = $F[$i-1][$j-1] + $this->S($A, $B); $delete = $F[$i-1][$j] + $this->d($A); $insert = $F[$i][$j-1] + $this->d($B); - + $F[$i][$j] = max($match, $delete, $insert); } } $score = $F[$max1][$max2]; - + return $score; } - + // Calculate the similarity between two Arabic strings public function similar_text($string1, $string2, &$percent = null) { $score = $this->arSimilarityScore($string1, $string2); $score1 = $this->arSimilarityScore($string1, $string1); $score2 = $this->arSimilarityScore($string2, $string2); - + $percent = 100 * $score / max($score1, $score2); - + return $score/8; } - + public function setSimilarityWeight($source, $value = 1) { switch ($source) { diff --git a/tests/ArabicTest.php b/tests/ArabicTest.php index df47725..670a449 100644 --- a/tests/ArabicTest.php +++ b/tests/ArabicTest.php @@ -1105,11 +1105,15 @@ public function testArabicPluralForms(): void $actual[] = str_replace('%d', $number, $text); $number = 7; - $expected[] = 'أيام'; $text = $Arabic->arPlural('يوم', $number, nameOnly: true); $actual[] = $text; // str_replace('%d', $number, $text) is redundant in this case + $number = 1; + $expected[] = 'خطأ واحد'; + $text = $Arabic->arPlural('خطأ', 1, 'خطآن', 'أخطاء', 'خطأ', isFemale: false); + $actual[] = str_replace('%d', $number, $text); + $this->assertEquals($actual, $expected); }