Skip to content

Commit

Permalink
Merge pull request #3 from DaveChild/patch-1
Browse files Browse the repository at this point in the history
Added new "probabilities" method
  • Loading branch information
noogen authored Jul 8, 2019
2 parents 4a19e4e + 28236ec commit d41ab39
Showing 1 changed file with 29 additions and 6 deletions.
35 changes: 29 additions & 6 deletions src/Bayes.php
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,32 @@ public function categorize($text)
$maxProbability = -INF;
$chosenCategory = null;

if ($self->totalDocuments > 0) {
$probabilities = $self->probabilities($text);

// iterate thru our categories to find the one with max probability
// for this text
foreach ($probabilities as $category => $logProbability) {
if ($logProbability > $maxProbability) {
$maxProbability = $logProbability;
$chosenCategory = $category;
}
}
}

return $chosenCategory;
}

/**
* Extract the probabilities for each known category
* @param string $text
* @return array probabilities by category or null
*/
public function probabilities($text)
{
$self = $this;
$probabilities = [];

if ($self->totalDocuments > 0) {
$tokens = ($self->tokenizer)($text);
$frequencyTable = $self->frequencyTable($tokens);
Expand All @@ -222,15 +248,12 @@ public function categorize($text)
// determine the log of the P( w | c ) for this word
$logProbability += $frequencyInText * log($tokenProbability);
}

if ($logProbability > $maxProbability) {
$maxProbability = $logProbability;
$chosenCategory = $category;
}

$probabilities[$category] = $logProbability;
}
}

return $chosenCategory;
return $probabilities;
}

/**
Expand Down

0 comments on commit d41ab39

Please sign in to comment.