Skip to content

Commit

Permalink
Generalize the realText algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
tzhuan committed Oct 2, 2014
1 parent 2e48342 commit 6af3cce
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 93 deletions.
45 changes: 37 additions & 8 deletions src/Faker/Provider/Text.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
abstract class Text extends \Faker\Provider\Base
{
protected static $baseText = '';
protected static $separator = ' ';
protected static $separatorLen = 1;
protected $explodedText = null;
protected $consecutiveWords = array();

Expand Down Expand Up @@ -37,6 +39,7 @@ public function realText($maxNbChars = 200, $indexSize = 2)
throw new \InvalidArgumentException('indexSize must be at most 5');
}


$words = $this->getConsecutiveWords($indexSize);
$result = array();
$resultLength = 0;
Expand All @@ -47,28 +50,28 @@ public function realText($maxNbChars = 200, $indexSize = 2)
$word = static::randomElement($words[$next]);

// calculate next index
$currentWords = explode(' ', $next);
$currentWords = static::explode($next);
$currentWords[] = $word;
array_shift($currentWords);
$next = implode(' ', $currentWords);
$next = static::implode($currentWords);

// ensure text starts with an uppercase letter
if ($resultLength == 0 && !preg_match('/^\p{Lu}/u', $word)) {
if ($resultLength == 0 && !static::validStart($word)) {
continue;
}

// append the element
$result[] = $word;
$resultLength += strlen($word) + 1;
$resultLength += static::strlen($word) + static::$separatorLen;
}

// remove the element that caused the text to overflow
array_pop($result);

// build result
$result = implode(' ', $result);
$result = static::implode($result);

return $result.'.';
return static::appendEnd($result);
}

protected function getConsecutiveWords($indexSize)
Expand All @@ -82,7 +85,7 @@ protected function getConsecutiveWords($indexSize)
}

for ($i = 0, $count = count($parts); $i < $count; $i++) {
$stringIndex = implode(' ', $index);
$stringIndex = static::implode($index);
if (!isset($words[$stringIndex])) {
$words[$stringIndex] = array();
}
Expand All @@ -101,9 +104,35 @@ protected function getConsecutiveWords($indexSize)
protected function getExplodedText()
{
if ($this->explodedText === null) {
$this->explodedText = explode(' ', preg_replace('/\s+/u', ' ', static::$baseText));
$this->explodedText = static::explode(preg_replace('/\s+/u', ' ', static::$baseText));
}

return $this->explodedText;
}

protected static function explode($text)
{
return explode(static::$separator, $text);
}

protected static function implode($words)
{
return implode(static::$separator, $words);
}

protected static function strlen($text)
{
return strlen($text);
}

protected static function validStart($word)
{
return preg_match('/^\p{Lu}/u', $word);
}

protected static function appendEnd($text)
{
return $text.'.';
}

}
96 changes: 11 additions & 85 deletions src/Faker/Provider/zh_TW/Text.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

class Text extends \Faker\Provider\Text
{
protected $explodedText = null;
protected $consecutiveWords = array();
protected static $separator = '';
protected static $separatorLen = 0;
protected static $punct = array('', '', '', '', '', '', '', '', '', '');

/**
* Title: 三國演義 Romance of the Three Kingdoms
Expand Down Expand Up @@ -77,98 +78,23 @@ class Text extends \Faker\Provider\Text
三人飛馬引軍而出。張角正殺敗董卓,乘勢趕來,忽遇三人衝殺,角軍大亂,敗走五十餘里。三人救了董卓回寨。卓問三人現居何職。玄德曰:「白身。」卓甚輕之,不為禮。玄德出,張飛大怒曰:「我等親赴血戰,救了這廝,他卻如此無禮;若不殺之,難消我氣!」便要提刀入帳來殺董卓。正是:人情勢利古猶今,誰識英雄是白身?安得快人如翼德,盡誅世上負心人!畢竟董卓性命如何,且看下文分解。
EOT;

public function realText($maxNbChars = 200, $indexSize = 2)
protected static function explode($text)
{
if ($maxNbChars < 10) {
throw new \InvalidArgumentException('maxNbChars must be at least 10');
}
if ($indexSize < 1) {
throw new \InvalidArgumentException('indexSize must be at least 1');
}
if ($indexSize > 5) {
throw new \InvalidArgumentException('indexSize must be at most 5');
}

$words = $this->getConsecutiveWords($indexSize);
$result = array();
$resultLength = 0;
// take a random starting point
$punct = array('', '', '', '', '', '', '', '', '', '');
$next = static::randomKey($words);
while ($resultLength < $maxNbChars && isset($words[$next])) {
// fetch a random word to append
$word = static::randomElement($words[$next]);

// calculate next index
$currentWords = static::split($next);
$currentWords[] = $word;
array_shift($currentWords);
$next = implode('', $currentWords);

// ensure the first word is not punctuation
if ($resultLength === 0 and in_array($word, $punct)) {
continue;
}

// append the element
$result[] = $word;
$resultLength += static::strlen($word);
}

// remove the element that caused the text to overflow
array_pop($result);

// build result
$result = implode('', $result);

return $result.static::randomElement(array('', '', '',));
}

protected function getConsecutiveWords($indexSize)
{
if (!isset($this->consecutiveWords[$indexSize])) {
$parts = $this->getExplodedText();
$words = array();
$index = array();
for ($i = 0; $i < $indexSize; $i++) {
$index[] = array_shift($parts);
}

for ($i = 0, $count = count($parts); $i < $count; $i++) {
$stringIndex = implode('', $index);
if (!isset($words[$stringIndex])) {
$words[$stringIndex] = array();
}
$word = $parts[$i];
$words[$stringIndex][] = $word;
array_shift($index);
$index[] = $word;
}
// cache look up words for performance
$this->consecutiveWords[$indexSize] = $words;
}

return $this->consecutiveWords[$indexSize];
return array_values(array_filter(preg_split('//u', preg_replace('/\s+/', '', $text))));
}

protected function getExplodedText()
protected static function strlen($text)
{
if ($this->explodedText === null) {
$this->explodedText = static::split(static::$baseText);
}
return $this->explodedText;
return function_exists('mb_get_info') ? mb_strlen($text) : count(static::split($text));
}

public static function split($text)
protected static function validStart($word)
{
return array_values(array_filter(preg_split('//u', preg_replace('/\s+/', '', $text))));
return !in_array($word, static::$punct);
}

public static function strlen($text)
protected static function appendEnd($text)
{
if (function_exists('mb_get_info')) {
return mb_strlen($text);
}
return count(static::split($text));
return $text.static::randomElement(array('', '', '',));
}
}

0 comments on commit 6af3cce

Please sign in to comment.