From ebd44a72ffadeefb5bd6e7b1881345e6e611a9b1 Mon Sep 17 00:00:00 2001 From: "quot;brussens" Date: Thu, 19 Feb 2015 09:38:22 +0300 Subject: [PATCH] =?UTF-8?q?=D0=92=D0=B5=D1=80=D1=81=D0=B8=D1=8F=200.2.0?= =?UTF-8?q?=CE=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - #16: Добавлена фильтрация содержимого сообщения (@brussens) - #15: Добавлен раздел спам сообщений (@brussens) - #9: Реализованы пометки писем, как "спам" (@brussens) - #5: Реализовано удаление сообщений пользователями (@brussens) - #12: Добавлен антимат фильтр (@brussens) - #17: Исправлены неверные счётчики при удалении сообщений (@brussens) + некоторые мелкие правки --- CHANGELOG.md | 18 + MessageModule.php | 14 +- README.md | 3 +- assets/MessageAssets.php | 14 +- assets/less/message.less | 25 +- assets/web/css/message.css | 2 +- controllers/MessageBackendController.php | 6 +- controllers/MessageController.php | 201 +- forms/ComposeForm.php | 37 +- install/message.php | 12 +- .../m000000_000000_message_base.php | 7 +- .../m150219_1424325633_add_spam.php | 37 + messages/ru/message.php | 19 +- models/Message.php | 64 +- vendor/php-censure/Censure.php | 359 ++ vendor/php-censure/ReflectionTypeHint.php | 184 + .../ReflectionTypeHint_example.php | 23 + vendor/php-censure/UTF8-CHANGELOG.txt | 65 + vendor/php-censure/UTF8.php | 4072 +++++++++++++++++ views/layouts/message.php | 29 +- views/message/_inbox.php | 27 +- views/message/_outbox.php | 19 +- views/message/_spam.php | 50 + views/message/compose.php | 9 +- views/message/inbox.php | 6 +- views/message/outbox.php | 6 +- views/message/spam.php | 28 + views/message/view.php | 6 +- views/messageBackend/_form.php | 6 +- views/messageBackend/_search.php | 6 +- views/messageBackend/create.php | 6 +- views/messageBackend/index.php | 6 +- views/messageBackend/update.php | 6 +- views/messageBackend/view.php | 6 +- 34 files changed, 5277 insertions(+), 101 deletions(-) create mode 100644 CHANGELOG.md create mode 100644 install/migrations/m150219_1424325633_add_spam.php create mode 100644 vendor/php-censure/Censure.php create mode 100644 vendor/php-censure/ReflectionTypeHint.php create mode 100644 vendor/php-censure/ReflectionTypeHint_example.php create mode 100644 vendor/php-censure/UTF8-CHANGELOG.txt create mode 100644 vendor/php-censure/UTF8.php create mode 100644 views/message/_spam.php create mode 100644 views/message/spam.php diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d330b2f --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,18 @@ +CHANGELOG | История изменений +=================== + +Версия 0.2.0β 19 февраля 2015 +------------------------------------- +- #16: Добавлена фильтрация содержимого сообщения (@brussens) +- #15: Добавлен раздел спам сообщений (@brussens) +- #9: Реализованы пометки писем, как "спам" (@brussens) +- #5: Реализовано удаление сообщений пользователями (@brussens) +- #12: Добавлен антимат фильтр (@brussens) +- #17: Исправлены неверные счётчики при удалении сообщений (@brussens) + +Версия 0.2.0α 6 февраля 2015 +------------------------------------- +- #11: Исправлена ошибка при пометке письма "прочитанным" (@brussens) +- #10: Стили переведены в LESS (@brussens) +- #6: Добавлена проверка доступа к контроллеру (@brussens, @UnQuai) +- #7: Реализованы административные контроллеры (@brussens) \ No newline at end of file diff --git a/MessageModule.php b/MessageModule.php index d7037ad..a3e9aa1 100644 --- a/MessageModule.php +++ b/MessageModule.php @@ -2,9 +2,9 @@ /** * Class Message - Message module mail class. * - * @author BrusSENS (Dmitry Brusenskiy) - * @link http://hoswac.ru - * @copyright 2014 Hoswac ltd. + * @author BrusSENS (Dmitry Brusenskiy) + * @link http://nativeweb.ru + * @copyright 2014 Native Web. * @package yupe.modules.message * @since 0.1α * @@ -12,7 +12,9 @@ class MessageModule extends \yupe\components\WebModule { - const VERSION = '0.2.0α'; + const VERSION = '0.2.0β'; + + public $censure = true; public function getDependencies() { @@ -75,12 +77,12 @@ public function getAuthor() public function getAuthorEmail() { - return 'brussens@hoswac.ru'; + return 'brussens@nativeweb.ru'; } public function getUrl() { - return 'http://hoswac.ru'; + return 'http://nativeweb.ru'; } public function getVersion() diff --git a/README.md b/README.md index c739ecc..137c360 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Приватные сообщения Модуль приватных сообщений для [!ЮПИ](http://yupe.ru) CMS -Текущая версия 0.2.0α +Текущая версия 0.2.0β ## 1. Установка * Скачиваем и распаковываем модуль в /protected/modules/message<содержимое папки yupe-message-master>. @@ -13,6 +13,7 @@ По умолчанию URL адреса имеют следующие значения: * /inbox - Входящие письма * /outbox - Исходящие письма +* /spam - Письма, помеченные спам * /compose - Новое письмо * /pm Просмотр сообщения diff --git a/assets/MessageAssets.php b/assets/MessageAssets.php index d1193d5..bdd6fe3 100644 --- a/assets/MessageAssets.php +++ b/assets/MessageAssets.php @@ -2,9 +2,9 @@ /** * Class MessageAssets - register module assets. * - * @author BrusSENS (Dmitry Brusenskiy) - * @link http://hoswac.ru - * @copyright 2014 Hoswac ltd. + * @author BrusSENS (Dmitry Brusenskiy) + * @link http://nativeweb.ru + * @copyright 2014 Native Web. * @package yupe.modules.message.assets * @since 0.2.0α * @@ -17,12 +17,15 @@ class MessageAssets const PACKAGE_ID = 'Message-module'; public $css = [ - 'css/message.css' + 'css/message.css', + 'emoji/jquery.emojiarea.css' ]; public $js = [ 'js/moment/moment.min.js', - 'js/timeago.js' + 'js/timeago.js', + 'emoji/jquery.emojiarea.min.js', + 'emoji/packs/basic/emojis.js' ]; public $depends = [ @@ -45,6 +48,7 @@ public function publish() ->registerScript( $this->getId(), 'jQuery(".timeago").dateFormat(); + jQuery("[data-action=\"data-href\"]").on("click", function(){ window.location=$(this).attr("data-href"); });', diff --git a/assets/less/message.less b/assets/less/message.less index 5679350..d4209ec 100644 --- a/assets/less/message.less +++ b/assets/less/message.less @@ -1,4 +1,12 @@ /** LEFT NAVIGATION **/ + +/* Micro ClearFix Mixin */ +.clearfix{ + zoom:1; + &:before, &:after{ content:""; display:table; } + &:after{ clear: both; } +} + #message-nav { & > li { & > a { @@ -44,25 +52,30 @@ border:1px solid #efefef; cursor:pointer; background:#f5f5f5; - & > .message-preview-remove { + & > .message-preview-actions { display:block; } } - .message-preview-remove { - display:none; + .message-preview-action { background:none; border:none; - position:absolute; - top:5px; - right:10px; font-size:18px; color:#aaaaaa; + margin:0 5px; &:hover, &:focus, &:active { color: #d9534f; } } + .message-preview-actions { + .clearfix; + display:none; + position:absolute; + top:5px; + right:10px; + } + & + .message-preview-item { border-top:none; } diff --git a/assets/web/css/message.css b/assets/web/css/message.css index 2df2700..7629c4d 100644 --- a/assets/web/css/message.css +++ b/assets/web/css/message.css @@ -1 +1 @@ -#message-nav>li>a{padding:6px 12px}#message-nav>li>a>.label{background:#d9534f;display:block}#message-nav>li.active>a>.label{background:#fff;color:#428bca}.message-wrapper .list-view{padding-top:0}.message-wrapper .message-preview-item{border:1px solid #efefef;font-size:12px;position:relative}.message-wrapper .message-preview-item table td{height:50px}.message-wrapper .message-preview-item:hover,.message-wrapper .message-preview-item:focus,.message-wrapper .message-preview-item:active{border:1px solid #efefef;cursor:pointer;background:#f5f5f5}.message-wrapper .message-preview-item:hover>.message-preview-remove,.message-wrapper .message-preview-item:focus>.message-preview-remove,.message-wrapper .message-preview-item:active>.message-preview-remove{display:block}.message-wrapper .message-preview-item .message-preview-remove{display:none;background:0 0;border:0;position:absolute;top:5px;right:10px;font-size:18px;color:#aaa}.message-wrapper .message-preview-item .message-preview-remove:hover,.message-wrapper .message-preview-item .message-preview-remove:focus,.message-wrapper .message-preview-item .message-preview-remove:active{color:#d9534f}.message-wrapper .message-preview-item+.message-preview-item{border-top:0}.message-wrapper .message-preview-item .message-preview-info{padding-left:0}.message-wrapper .message-preview-item .message-preview-info .message-preview-info-time{font-size:10px;color:#666}.message-wrapper .message-preview-item .message-preview-body{padding:5px 15px;border-radius:7px;overflow:hidden;max-height:100%}.message-wrapper .message-preview-item .message-preview-body:before,.message-wrapper .message-preview-item .message-preview-body:after{content:" ";display:table}.message-wrapper .message-preview-item .message-preview-body:after{clear:both}.message-wrapper .message-preview-item .message-preview-body .hide-image{color:#fff;background:#67809f;font-size:12px;padding:3px 6px;border-radius:3px}.message-wrapper .message-preview-item.hot .message-preview-body{background:#c5d9e7;color:#2b5b83}.message-view-wrapper{margin-bottom:15px}.message-view-wrapper .message-view-date{color:#d9534f;font-size:12px;padding:5px 0}.message-view-wrapper .message-view-body{font-size:16px;color:#2b5b83}.compose{padding:10px}.compose .compose-thumb{float:left;width:12%;padding-right:15px}.compose .compose-textarea{float:left;width:88%} \ No newline at end of file +.clearfix{zoom:1}.clearfix:after,.clearfix:before{content:"";display:table}.clearfix:after{clear:both}#message-nav>li>a{padding:6px 12px}#message-nav>li>a>.label{background:#d9534f;display:block}#message-nav>li.active>a>.label{background:#fff;color:#428bca}.message-wrapper .list-view{padding-top:0}.message-wrapper .message-preview-item{border:1px solid #efefef;font-size:12px;position:relative}.message-wrapper .message-preview-item table td{height:50px}.message-wrapper .message-preview-item:active,.message-wrapper .message-preview-item:focus,.message-wrapper .message-preview-item:hover{border:1px solid #efefef;cursor:pointer;background:#f5f5f5}.message-wrapper .message-preview-item:active>.message-preview-actions,.message-wrapper .message-preview-item:focus>.message-preview-actions,.message-wrapper .message-preview-item:hover>.message-preview-actions{display:block}.message-wrapper .message-preview-item .message-preview-action{background:0 0;border:none;font-size:18px;color:#aaa;margin:0 5px}.message-wrapper .message-preview-item .message-preview-action:active,.message-wrapper .message-preview-item .message-preview-action:focus,.message-wrapper .message-preview-item .message-preview-action:hover{color:#d9534f}.message-wrapper .message-preview-item .message-preview-actions{zoom:1;display:none;position:absolute;top:5px;right:10px}.message-wrapper .message-preview-item .message-preview-actions:after,.message-wrapper .message-preview-item .message-preview-actions:before{content:"";display:table}.message-wrapper .message-preview-item .message-preview-actions:after{clear:both}.message-wrapper .message-preview-item+.message-preview-item{border-top:none}.message-wrapper .message-preview-item .message-preview-info{padding-left:0}.message-wrapper .message-preview-item .message-preview-info .message-preview-info-time{font-size:10px;color:#666}.message-wrapper .message-preview-item .message-preview-body{padding:5px 15px;border-radius:7px;overflow:hidden;max-height:100%}.message-wrapper .message-preview-item .message-preview-body:after,.message-wrapper .message-preview-item .message-preview-body:before{content:" ";display:table}.message-wrapper .message-preview-item .message-preview-body:after{clear:both}.message-wrapper .message-preview-item .message-preview-body .hide-image{color:#fff;background:#67809f;font-size:12px;padding:3px 6px;border-radius:3px}.message-wrapper .message-preview-item.hot .message-preview-body{background:#c5d9e7;color:#2b5b83}.message-view-wrapper{margin-bottom:15px}.message-view-wrapper .message-view-date{color:#d9534f;font-size:12px;padding:5px 0}.message-view-wrapper .message-view-body{font-size:16px;color:#2b5b83}.compose{padding:10px}.compose .compose-thumb{float:left;width:12%;padding-right:15px}.compose .compose-textarea{float:left;width:88%} \ No newline at end of file diff --git a/controllers/MessageBackendController.php b/controllers/MessageBackendController.php index 14c7681..629e6bf 100644 --- a/controllers/MessageBackendController.php +++ b/controllers/MessageBackendController.php @@ -2,9 +2,9 @@ /** * Class MessageBackendController - backend message manipulation controller. * - * @author BrusSENS (Dmitry Brusenskiy) - * @link http://hoswac.ru - * @copyright 2014 Hoswac ltd. + * @author BrusSENS (Dmitry Brusenskiy) + * @link http://nativeweb.ru + * @copyright 2014 Native Web. * @package yupe.modules.message.controllers * @since 0.2α * diff --git a/controllers/MessageController.php b/controllers/MessageController.php index e710392..bb1fa17 100644 --- a/controllers/MessageController.php +++ b/controllers/MessageController.php @@ -1,10 +1,10 @@ - * @link http://hoswac.ru - * @copyright 2014 Hoswac ltd. + * @author BrusSENS (Dmitry Brusenskiy) + * @link http://nativeweb.ru + * @copyright 2014 Native Web. * @package yupe.modules.message.controllers * @since 0.1α * @@ -12,8 +12,14 @@ class MessageController extends yupe\components\controllers\FrontController { + /** + * @var string + */ public $layout = '/layouts/message'; + /** + * @return array + */ public function filters() { return [ @@ -21,6 +27,9 @@ public function filters() ]; } + /** + * @return array + */ public function accessRules() { return [ @@ -30,6 +39,10 @@ public function accessRules() 'outbox', 'compose', 'view', + 'delete', + 'spam', + 'spamMark', + 'spamUnMark', ], 'users' => ['?'], ], @@ -39,20 +52,29 @@ public function accessRules() 'outbox', 'compose', 'view', + 'delete', + 'spam', + 'spamMark', + 'spamUnMark', ], 'users' => ['@'], ], ]; } + /** + * Входящие сообщения + */ public function actionInbox() { $dataProvider=new CActiveDataProvider('Message', [ 'criteria' => [ - 'condition' => 'recipient_id = :user_id', + 'condition' => 'recipient_id = :user_id AND recipient_del = :recipient_del AND is_spam = :is_spam', 'params' => [ - ':user_id' => Yii::app()->getUser()->getId() + ':user_id' => Yii::app()->getUser()->getId(), + ':recipient_del' => Message::NOT_DELETED, + ':is_spam' => Message::NOT_SPAM, ], 'order' => 'sent_at DESC', 'with' => ['sender'], @@ -68,14 +90,18 @@ public function actionInbox() ]); } + /** + * Исходящие сообщения + */ public function actionOutbox() { $dataProvider=new CActiveDataProvider('Message', [ 'criteria' => [ - 'condition' => 'sender_id = :user_id', + 'condition' => 'sender_id = :user_id AND sender_del = :sender_del', 'params' => [ - ':user_id' => Yii::app()->getUser()->getId() + ':user_id' => Yii::app()->getUser()->getId(), + ':sender_del' => Message::NOT_DELETED, ], 'order' => 'sent_at DESC', 'with' => ['recipient'], @@ -91,12 +117,19 @@ public function actionOutbox() ]); } + /** + * СОздание письма. + */ public function actionCompose() { $model = new ComposeForm; + if(Yii::app()->request->getPost('ComposeForm')) { + $model->attributes = Yii::app()->request->getPost('ComposeForm'); + if($model->validate() && $model->send()) { + Yii::app()->getUser()->setFlash( yupe\widgets\YFlashMessages::SUCCESS_MESSAGE, Yii::t('MessageModule.message', 'Your message has been sent successfully') @@ -115,9 +148,16 @@ public function actionCompose() ]); } - public function actionView($mid) + /** + * Просмотр письма + * + * @param $id + * @throws CDbException + * @throws CHttpException + */ + public function actionView($id) { - $model = Message::model()->findByPk($mid); + $model = Message::model()->findByPk($id);// Добавить фильтрафию if(!$model || !$model->getHasAccess()) { throw new CHttpException(404, Yii::t('MessageModule.message', 'Such a message does not exist')); @@ -133,10 +173,147 @@ public function actionView($mid) ]); } - // TODO - public function actionRemove($mid) + /** + * Удаление письма. + * + * @param $id + * @throws CDbException + * @throws CHttpException + */ + public function actionDelete($id) + { + $model = Message::model()->findByPk($id); + + if(!$model || !$model->getHasAccess()) { + throw new CHttpException(404, Yii::t('MessageModule.message', 'Such a message does not exist')); + } + + if($model->getIsInbox()){ + if($model->recipient_del === Message::DELETED) { + throw new CHttpException(404, Yii::t('MessageModule.message', 'This message already removed')); + } + else { + $model->recipient_del = Message::DELETED; + } + } + elseif($model->getIsOutbox()) { + if($model->sender_del === Message::DELETED) { + throw new CHttpException(404, Yii::t('MessageModule.message', 'This message already removed')); + } + else { + $model->sender_del = Message::DELETED; + } + } + if($model->update()) { + Yii::app()->getUser()->setFlash( + yupe\widgets\YFlashMessages::SUCCESS_MESSAGE, + Yii::t('MessageModule.message', 'Message has been removed successfully') + ); + } + else { + Yii::app()->getUser()->setFlash( + yupe\widgets\YFlashMessages::ERROR_MESSAGE, + Yii::t('MessageModule.message', 'When you remove an error occurred, please try again later') + ); + } + $this->redirect(Yii::app()->request->urlReferrer); + } + + /** + * Спам письма. + */ + public function actionSpam() { + $dataProvider=new CActiveDataProvider('Message', [ + 'criteria' => [ + 'condition' => 'recipient_id = :user_id AND recipient_del = :recipient_del AND is_spam = :is_spam', + 'params' => [ + ':user_id' => Yii::app()->getUser()->getId(), + ':recipient_del' => Message::NOT_DELETED, + ':is_spam' => Message::SPAM, + ], + 'order' => 'sent_at DESC', + 'with' => ['sender'], + ], + + 'pagination' => [ + 'pageSize' => 20, + ], + ]); + + $this->render('spam', [ + 'dataProvider' => $dataProvider + ]); + } + /** + * + * Пометка письма, как спам. + * + * @param $id + * @throws CDbException + * @throws CHttpException + */ + public function actionSpamMark($id) + { + $model = Message::model()->findByPk($id); + + if(!$model || !$model->getHasAccess()) { + throw new CHttpException(404, Yii::t('MessageModule.message', 'Such a message does not exist')); + } + + if($model->getIsInbox() && !$model->getIsSpam()){ + $model->is_spam = Message::SPAM; + if($model->update()) { + Yii::app()->getUser()->setFlash( + yupe\widgets\YFlashMessages::SUCCESS_MESSAGE, + Yii::t('MessageModule.message', 'Message marked as spam successfully') + ); + } + else { + Yii::app()->getUser()->setFlash( + yupe\widgets\YFlashMessages::ERROR_MESSAGE, + Yii::t('MessageModule.message', 'When you marked message as spam an error occurred, please try again later') + ); + } + } + + $this->redirect(Yii::app()->request->urlReferrer); + } + + /** + * Пометка письма, как не спам. + * + * @param $id + * @throws CDbException + * @throws CHttpException + */ + public function actionSpamUnMark($id) + { + $model = Message::model()->findByPk($id); + + if(!$model || !$model->getHasAccess()) { + throw new CHttpException(404, Yii::t('MessageModule.message', 'Such a message does not exist')); + } + + if($model->getIsInbox() && $model->getIsSpam()){ + + $model->is_spam = Message::NOT_SPAM; + + if($model->update()) { + Yii::app()->getUser()->setFlash( + yupe\widgets\YFlashMessages::SUCCESS_MESSAGE, + Yii::t('MessageModule.message', 'Message marked as not spam successfully') + ); + } + else { + Yii::app()->getUser()->setFlash( + yupe\widgets\YFlashMessages::ERROR_MESSAGE, + Yii::t('MessageModule.message', 'When you marked message as not spam an error occurred, please try again later') + ); + } + } + $this->redirect(Yii::app()->request->urlReferrer); } } \ No newline at end of file diff --git a/forms/ComposeForm.php b/forms/ComposeForm.php index 9711ea2..c48f818 100644 --- a/forms/ComposeForm.php +++ b/forms/ComposeForm.php @@ -1,10 +1,10 @@ - * @link http://hoswac.ru - * @copyright 2014 Hoswac ltd. + * @author BrusSENS (Dmitry Brusenskiy) + * @link http://nativeweb.ru + * @copyright 2014 Native Web. * @package yupe.modules.message.controllers * @since 0.2.0α * @@ -21,6 +21,7 @@ public function rules() ['recipient', 'required', 'message' => Yii::t('MessageModule.message', 'Recipient can not be empty')], ['body', 'required', 'message' => Yii::t('MessageModule.message', 'Message can not be empty')], ['body', 'length', 'min' => 1], + ['body', 'clean'], ]; } @@ -35,8 +36,36 @@ public function send() $message->is_read = Message::STATUS_NEW; $message->sender_del = Message::NOT_DELETED; $message->recipient_del = Message::NOT_DELETED; + $message->is_spam = Message::NOT_SPAM; return $message->save(false); } return false; } + + public function clean($attribute) + { + $options = [ + 'HTML.AllowedElements' => [ + 'a' + ], + 'URI.AllowedSchemes' => [ + 'http' => true, + 'https' => true + ], + //'URI.Munge' => true, + 'AutoFormat.Linkify' => true, + //'URI.HostBlacklist' => '', TODO: подключить API DR.WEB для чёрных ссылок + ]; + + $purifier = new CHtmlPurifier(); + $purifier->options = $options; + $this->$attribute = $purifier->purify($this->$attribute); + + if(Yii::app()->getModule('message')->censure) { + Yii::import('message.vendor.php-censure.Censure'); + $this->$attribute = Censure::parse($this->$attribute, '10', '', false, ':censored:'); + } + + return $this->$attribute; + } } \ No newline at end of file diff --git a/install/message.php b/install/message.php index 278689f..27f4ebc 100644 --- a/install/message.php +++ b/install/message.php @@ -2,9 +2,9 @@ /** * Module configuration file. * - * @author BrusSENS (Dmitry Brusenskiy) - * @link http://hoswac.ru - * @copyright 2014 Hoswac ltd. + * @author BrusSENS (Dmitry Brusenskiy) + * @link http://nativeweb.ru + * @copyright 2014 Native Web. * @package yupe.modules.message.install * @since 0.1α * @@ -18,7 +18,11 @@ 'rules'=>[ '/inbox' => 'message/message/inbox', '/outbox' => 'message/message/outbox', + '/spam' => 'message/message/spam', '/compose' => 'message/message/compose', - '/pm' => 'message/message/view', + '/pm' => 'message/message/view', + '/pm/remove' => 'message/message/delete', + '/pm/spam/mark' => 'message/message/spamMark', + '/pm/spam/unmark' => 'message/message/spamUnMark', ], ]; \ No newline at end of file diff --git a/install/migrations/m000000_000000_message_base.php b/install/migrations/m000000_000000_message_base.php index 8fb5c17..0668dd0 100644 --- a/install/migrations/m000000_000000_message_base.php +++ b/install/migrations/m000000_000000_message_base.php @@ -3,9 +3,9 @@ /** * Class m000000_000000_message_base - migrations class {{message_message}} table. * - * @author BrusSENS (Dmitry Brusenskiy) - * @link http://hoswac.ru - * @copyright 2014 Hoswac ltd. + * @author BrusSENS (Dmitry Brusenskiy) + * @link http://nativeweb.ru + * @copyright 2014 Native Web. * @package yupe.modules.message.install.migrations * @since 0.2.0α * @@ -32,6 +32,7 @@ public function safeUp() 'is_read' => 'integer NOT NULL', 'sender_del' => 'integer NOT NULL', 'recipient_del' => 'integer NOT NULL', + 'is_spam' => 'integer NOT NULL', ], $this->getOptions() ); diff --git a/install/migrations/m150219_1424325633_add_spam.php b/install/migrations/m150219_1424325633_add_spam.php new file mode 100644 index 0000000..629faff --- /dev/null +++ b/install/migrations/m150219_1424325633_add_spam.php @@ -0,0 +1,37 @@ + + * @link http://nativeweb.ru + * @copyright 2014 Native Web. + * @package yupe.modules.message.install.migrations + * @since 0.2.0β + * + */ + +class m150219_1424325633_add_spam extends yupe\components\DbMigration +{ + /** + * + * @return bool|void + * + */ + public function safeUp() + { + $this->addColumn('{{message_message}}', 'is_spam', 'integer NOT NULL'); + $this->createIndex("ix_{{message_message}}_is_spam", '{{message_message}}', "is_spam", false); + } + + /** + * + * @return bool|void + * + */ + public function safeDown() + { + $this->dropIndex('ix_{{message_message}}_is_spam', '{{message_message}}'); + $this->dropColumn('{{message_message}}', 'is_spam'); + } +} \ No newline at end of file diff --git a/messages/ru/message.php b/messages/ru/message.php index 4da4b9f..3aae6ca 100644 --- a/messages/ru/message.php +++ b/messages/ru/message.php @@ -2,9 +2,9 @@ /** * message.php - Russian localization module file. * - * @author BrusSENS (Dmitry Brusenskiy) - * @link http://hoswac.ru - * @copyright 2014 Hoswac ltd. + * @author BrusSENS (Dmitry Brusenskiy) + * @link http://nativeweb.ru + * @copyright 2014 Native Web. * @package yupe.modules.message.messages.ru * @since 0.1α * @@ -65,5 +65,16 @@ 'Message has been changed' => 'Сообщение сохранено', 'Message has been removed' => 'Сообщение удалено', 'You can\'t make this changes' => 'Невозможно применить изменения', - 'Bad request. Please don\'t use similar requests anymore' => 'Неверный запрос. Пожалуйста, не используйте подобные запросы' + 'Bad request. Please don\'t use similar requests anymore' => 'Неверный запрос. Пожалуйста, не используйте подобные запросы', + 'Spam' => 'Спам', + 'Is spam' => 'Это спам', + 'Is not spam' => 'Это не спам', + 'Delete' => 'Удалить', + 'This message already removed' => 'Это сообщение уже было удалено', + 'Message has been removed successfully' => 'Сообщение успешно удалено', + 'When you remove an error occurred, please try again later' => 'При удалении произошла ошибка, попробуйте повторить запрос позже', + 'Message marked as spam successfully' => 'Сообщение помечено как спам успешно', + 'When you marked message as spam an error occurred, please try again later' => 'При пометке сообщения как спам произошла ошибка, попробуйте повторить запрос позже', + 'Message marked as not spam successfully' => 'Сообщение помечено как не спам успешно', + 'When you marked message as not spam an error occurred, please try again later' => 'При пометке сообщения как не спам произошла ошибка, попробуйте повторить запрос позже', ]; \ No newline at end of file diff --git a/models/Message.php b/models/Message.php index b575a13..193e507 100644 --- a/models/Message.php +++ b/models/Message.php @@ -2,9 +2,9 @@ /** * Class Message - the model {{message_message}} table. * - * @author BrusSENS (Dmitry Brusenskiy) - * @link http://hoswac.ru - * @copyright 2014 Hoswac ltd. + * @author BrusSENS (Dmitry Brusenskiy) + * @link http://nativeweb.ru + * @copyright 2014 Native Web. * @package yupe.modules.message.models * @since 0.1α * @@ -18,10 +18,15 @@ class Message extends yupe\models\YModel { // status not read const STATUS_NEW = 2; - const NOT_DELETED = 2; + const NOT_DELETED = 1; const DELETED = 2; + const NOT_SPAM = 1; + + const SPAM = 2; + + /** * @param null|string $className * @return $this @@ -39,6 +44,9 @@ public function tableName() return '{{message_message}}'; } + /** + * @return array + */ public function attributeLabels() { return[ @@ -51,6 +59,9 @@ public function attributeLabels() ]; } + /** + * @return array + */ public function rules() { return [ @@ -71,16 +82,38 @@ public function relations() ]; } + /** + * Сообщение входящее + * + * @return bool + */ public function getIsInbox() { return Yii::app()->user->id === $this->recipient_id; } + /** + * Сообщение исходящее. + * + * @return bool + */ + public function getIsOutbox() + { + return Yii::app()->user->id === $this->sender_id; + } + + /** + * @return bool + */ public function getIsNew() { return $this->is_read == self::STATUS_NEW; } + /** + * Проверка доступа к сообщению. + * @return bool + */ public function getHasAccess() { if($this->sender_id === Yii::app()->user->id || $this->recipient_id === Yii::app()->user->id) { @@ -89,6 +122,11 @@ public function getHasAccess() return false; } + /** + * Список статусов. + * + * @return array + */ public function getStatusList() { return [ @@ -97,11 +135,29 @@ public function getStatusList() ]; } + /** + * Получение статуса сообщения + * + * @return mixed + */ public function getStatus() { return $this->getStatusList()[$this->is_read]; } + /** + * Проверка сообщения на статус спама. + * @return bool + */ + public function getIsSpam() + { + return $this->is_spam == self::SPAM; + } + + /** + * @param int $pageSize + * @return CActiveDataProvider + */ public function search($pageSize = 10) { $criteria = new CDbCriteria(); diff --git a/vendor/php-censure/Censure.php b/vendor/php-censure/Censure.php new file mode 100644 index 0000000..5ff513b --- /dev/null +++ b/vendor/php-censure/Censure.php @@ -0,0 +1,359 @@ += 5.2.0 + * @param string $charset кодировка символов (родная кодировка -- UTF-8, для других будет прозрачное перекодирование) + * @return bool|string|int|null Если $replace === NULL, то возвращает FALSE, если мат не обнаружен, иначе фрагмент текста с матерным словом. + * Если $replace !== NULL, то возвращает исходную строку, где фрагменты мата заменены на $replace. + * В случае возникновения ошибки возвращает код ошибки > 0 (integer): + * * PREG_INTERNAL_ERROR + * * PREG_BACKTRACK_LIMIT_ERROR (see also pcre.backtrack_limit) + * * PREG_RECURSION_LIMIT_ERROR (see also pcre.recursion_limit) + * * PREG_BAD_UTF8_ERROR + * * PREG_BAD_UTF8_OFFSET_ERROR (since PHP 5.3.0) + * Или -1, если ReflectionTypeHint вернул ошибку + */ + public static function parse( + $s, + $delta = 3, + $continue = "\xe2\x80\xa6", + $is_html = true, + $replace = null, + $charset = 'UTF-8') + { + if (!ReflectionTypeHint::isValid()) return -1; + if ($s === null) return null; + + static $re_badwords = null; + + if ($re_badwords === null) + { + #предлоги русского языка: + #[всуо]| + #по|за|на|об|до|от|вы|вс|вз|из|ис| + #под|про|при|над|низ|раз|рас|воз|вос| + #пооб|повы|пона|поза|недо|пере|одно| + #полуза|произ|пораз|много| + $pretext = array( + #1 + '[уyоoаa]_? (?=[еёeхx])', #у, о (уебать, охуеть, ахуеть) + '[вvbсc]_? (?=[хпбмгжxpmgj])', #в, с (впиздячить, схуярить) + '[вvbсc]_?[ъь]_? (?=[еёe])', #въ, съ (съебаться, въебать) + 'ё_? (?=[бb6])', #ё (ёбля) + #2 + '[вvb]_?[ыi]_?', #вы + '[зz3]_?[аa]_?', #за + '[нnh]_?[аaеeиi]_?', #на, не, ни + '[вvb]_?[сc]_? (?=[хпбмгжxpmgj])', #вс (вспизднуть) + '[оo]_?[тtбb6]_? (?=[хпбмгжxpmgj])', #от, об + '[оo]_?[тtбb6]_?[ъь]_? (?=[еёe])', #отъ, объ + '[иiвvb]_?[зz3]_? (?=[хпбмгжxpmgj])', #[ив]з + '[иiвvb]_?[зz3]_?[ъь]_? (?=[еёe])', #[ив]зъ + '[иi]_?[сc]_? (?=[хпбмгжxpmgj])', #ис + '[пpдdg]_?[оo]_? (?> [бb6]_? (?=[хпбмгжxpmgj]) + | [бb6]_? [ъь]_? (?=[еёe]) + | [зz3]_? [аa] _? + )?', #по, до, пообъ, дообъ, поза, доза (двойные символы вырезаются!) + #3 + '[пp]_?[рr]_?[оoиi]_?', #пр[ои] + '[зz3]_?[лl]_?[оo]_?', #зло (злоебучая) + '[нnh]_?[аa]_?[дdg]_? (?=[хпбмгжxpmgj])', #над + '[нnh]_?[аa]_?[дdg]_?[ъь]_? (?=[еёe])', #надъ + '[пp]_?[оoаa]_?[дdg]_? (?=[хпбмгжxpmgj])', #под + '[пp]_?[оoаa]_?[дdg]_?[ъь]_? (?=[еёe])', #подъ + '[рr]_?[аa]_?[зz3сc]_? (?=[хпбмгжxpmgj])', #ра[зс] + '[рr]_?[аa]_?[зz3сc]_?[ъь]_? (?=[еёe])', #ра[зс]ъ + '[вvb]_?[оo]_?[зz3сc]_? (?=[хпбмгжxpmgj])', #во[зс] + '[вvb]_?[оo]_?[зz3сc]_?[ъь]_? (?=[еёe])', #во[зс]ъ + #4 + '[нnh]_?[еe]_?[дdg]_?[оo]_?', #недо + '[пp]_?[еe]_?[рr]_?[еe]_?', #пере + '[oо]_?[дdg]_?[нnh]_?[оo]_?', #одно + '[кk]_?[oо]_?[нnh]_?[оo]_?', #коно (коноебиться) + '[мm]_?[уy]_?[дdg]_?[oоaа]_?', #муд[оа] (мудаёб) + '[oо]_?[сc]_?[тt]_?[оo]_?', #осто (остопиздело) + '[дdg]_?[уy]_?[рpr]_?[оoаa]_?', #дур[оа] + '[хx]_?[уy]_?[дdg]_?[оoаa]_?', #худ[оа] (худоебина) + #5 + '[мm]_?[нnh]_?[оo]_?[гg]_?[оo]_?', #много + '[мm]_?[оo]_?[рpr]_?[дdg]_?[оoаa]_?', #морд[оа] + '[мm]_?[оo]_?[зz3]_?[гg]_?[оoаa]_?', #мозг[оа] + '[дdg]_?[оo]_?[лl]_?[бb6]_?[оoаa]_?', #долб[оа] + '[оo]_?[сc]_?[тt]_?[рpr]_?[оo]_?', #остро + ); + + $badwords = array( + #Слово на букву Х + '(?<=\PL) %RE_PRETEXT%? + [hхx]_?[уyu]_?[ийiеeёяюju] #хуй, хуя, хую, хуем, хуёвый, охуительный + #исключения: + (? '\x20', #пробел + '\pL' => '[^\x20\d]', #буква + '\PL' => '[\x20\d]', #не буква + '[:vowel:]' => '[аеиоуыэюяёaeioyu]', #гласные буквы + '[:consonant:]' => '[^аеиоуыэюяёaeioyu\x20\d]', #согласные буквы + ); + + $re_badwords = str_replace( + '%RE_PRETEXT%', + '(?:' . implode('|', $pretext) . ')', #однократный шаблон с альтернативами использовать нельзя! + '~' . implode('|', $badwords) . '~sxuSX' + ); + $re_badwords = strtr($re_badwords, $trans); + } + + $s = UTF8::convert_from($s, $charset); + $replace = UTF8::convert_from($replace, $charset); + + $ss = $s; #saves original string + + if ($is_html) + { + #скрипты не вырезаем, т.к. м.б. обходной маневр на с кодом на javascript: + # + #хотя давать пользователю возможность использовать код на javascript нехорошо + $s = is_callable(array('HTML', 'strip_tags')) ? HTML::strip_tags($s, null, true, array('comment', 'style', 'map', 'frameset', 'object', 'applet')) + : strip_tags($s); + #заменяем html-сущности в "чистый" UTF-8 + $s = UTF8::html_entity_decode($s, $is_htmlspecialchars = true); + } + + if (strtoupper(substr($charset, 0, 3)) === 'UTF') #UTF-8, UTF-16, UTF-32 + { + #remove combining diactrical marks + $additional_chars = array( + "\xc2\xad", #"мягкие" переносы строк (­) + ); + $s = UTF8::diactrical_remove($s, $additional_chars); + } + + #ВотБ/\яПидорыОхуелиБлятьНахуйПохуйПи3децПолный + if (version_compare(PHP_VERSION, '5.2.0', '>=')) + { + $s = preg_replace('~ [\p{Lu}3] (?>\p{Ll}+|/\\\\|[@36]+)++ #Вот + (?= [\p{Lu}3] (?:\p{Ll} |/\\\\|[@36] ) ) #Бля + ~sxuSX', '$0 ', $s); + } + + $s = UTF8::lowercase($s); + + #получаем в массив только буквы и цифры + #"с_л@о#во,с\xc2\xa7лово.Слово" -> "с л о во с лово слово слово слово слово" + preg_match_all('~(?> \xd0[\xb0-\xbf]|\xd1[\x80-\x8f\x91] #[а-я] + | /\\\\ #л + | @ #а + | [a-z\d]+ + )+ + ~sxSX', $s, $m); + $s = ' ' . implode(' ', $m[0]) . ' '; + + $trans = array( + '/\\' => 'л', #Б/\ЯТЬ --> БЛЯТЬ + '@' => 'а', #пизд@ --> пизда + ); + $s = strtr($s, $trans); + + #цифровые подделки под буквы + $trans = array( + '~ [3з]++ [3з\x20]*+ ~sxuSX' => 'з', + '~ [6б]++ [6б\x20]*+ ~sxuSX' => 'б', + ); + $s = preg_replace(array_keys($trans), array_values($trans), $s); + + #убираем все повторяющиеся символы, ловим обман типа "х-у-у-й" + #"сллоооовоо слово х у у й" --> "слово слово х у й" + $s = preg_replace('/( [\xd0\xd1][\x80-\xbf] \x20? #optimized [а-я] + | [a-z\d] \x20? + ) \\1+ + /sxSX', '$1', $s); + + if ($replace === null || version_compare(PHP_VERSION, '5.2.0', '<')) + { + $result = preg_match($re_badwords, $s, $m, PREG_OFFSET_CAPTURE); + if (function_exists('preg_last_error') && preg_last_error() !== PREG_NO_ERROR) return preg_last_error(); + if ($result === false) return 1; #PREG_INTERNAL_ERROR = 1 + if ($result && $replace === null) + { + list($word, $offset) = $m[0]; + $s1 = substr($s, 0, $offset); + $s2 = substr($s, $offset + strlen($word)); + $delta = intval($delta); + if ($delta === 0) $fragment = '[' . trim($word) . ']'; + else + { + if ($delta < 1 || $delta > 10) $delta = 3; + preg_match('/ (?> \x20 (?>[\xd0\xd1][\x80-\xbf]|[a-z\d]+)++ ){1,' . $delta . '}+ + \x20?+ + $/sxSX', $s1, $m1); + preg_match('/^ (?>[\xd0\xd1][\x80-\xbf]|[a-z\d]+)*+ #ending + \x20?+ + (?> (?>[\xd0\xd1][\x80-\xbf]|[a-z\d]+)++ \x20 ){0,' . $delta . '}+ + /sxSX', $s2, $m2); + $fragment = (ltrim(@$m1[0]) !== ltrim($s1) ? $continue : '') . + trim((isset($m1[0]) ? $m1[0] : '') . '[' . trim($word) . ']' . (isset($m2[0]) ? $m2[0] : '')) . + (rtrim(@$m2[0]) !== rtrim($s2) ? $continue : ''); + } + return UTF8::convert_to($fragment, $charset); + } + return false; + } + + $result = preg_match_all($re_badwords, $s, $m); + if (function_exists('preg_last_error') && preg_last_error() !== PREG_NO_ERROR) return preg_last_error(); + if ($result === false) return 1; #PREG_INTERNAL_ERROR = 1 + if ($result > 0) + { + #d($s, $m[0]); + $s = $ss; + #замена матного фрагмента на $replace + foreach ($m[0] as $w) + { + $re_w = '~' . preg_replace_callback('~(?:/\\\\|[^\x20])~suSX', array('self', '_make_regexp_callback'), $w) . '~sxuiSX'; + $ss = preg_replace($re_w, $replace, $ss); + #d($re_w); + } + while ($ss !== $s) $ss = self::parse($s = $ss, $delta, $continue, $is_html, $replace, 'UTF-8'); + } + return UTF8::convert_to($ss, $charset); + } + + private static function _make_regexp_callback(array $m) + { + #$re_holes = '[\x00-\x20\-_\*\~\.\'"\^=`:]'; + #$re_holes = '[\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]'; + $re_holes = '(?!/\\\\)[^\p{L}\d]'; #non letter, non digit, non '/\' + if ($m[0] === 'а') $re = '[@аА]++ (?>[:holes:]|[@аА]+)*+'; + elseif ($m[0] === 'з') $re = '[3зЗ]++ (?>[:holes:]|[3зЗ]+)*+'; + elseif ($m[0] === 'б') $re = '[6бБ]++ (?>[:holes:]|[6бБ]+)*+'; + elseif ($m[0] === 'л') $re = '(?>[лЛ]+|/\\\\)++ (?>[:holes:]|[лЛ]+|/\\\\)*+'; + else + { + #в PCRE-7.2 флаг /i в комбинации с /u в регулярном выражении почему-то не работает (BUG?) + #поэтому делаем класс символов с буквами в обоих регистрах + $char = '[' . preg_quote($m[0] . UTF8::uppercase($m[0]), '~') . ']'; + $re = str_replace('$0', $char, '$0++ (?>[:holes:]|$0+)*+'); + } + return str_replace('[:holes:]', $re_holes, $re . "\r\n"); + } +} \ No newline at end of file diff --git a/vendor/php-censure/ReflectionTypeHint.php b/vendor/php-censure/ReflectionTypeHint.php new file mode 100644 index 0000000..3f047dc --- /dev/null +++ b/vendor/php-censure/ReflectionTypeHint.php @@ -0,0 +1,184 @@ + 'is_int', + 'integer' => 'is_int', + 'digit' => 'ctype_digit', + 'number' => 'ctype_digit', + 'float' => 'is_float', + 'double' => 'is_float', + 'real' => 'is_float', + 'numeric' => 'is_numeric', + 'str' => 'is_string', + 'string' => 'is_string', + 'char' => 'is_string', + 'bool' => 'is_bool', + 'boolean' => 'is_bool', + 'null' => 'is_null', + 'array' => 'is_array', + 'obj' => 'is_object', + 'object' => 'is_object', + 'res' => 'is_resource', + 'resource' => 'is_resource', + 'scalar' => 'is_scalar', #integer, float, string or boolean + 'cb' => 'is_callable', + 'callback' => 'is_callable', + ); + + #calling the methods of this class only statically! + private function __construct() {} + + public static function isValid() + { + if (! assert_options(ASSERT_ACTIVE)) return true; + $bt = self::debugBacktrace(null, 1); + extract($bt); //to $file, $line, $function, $class, $object, $type, $args + if (! $args) return true; #speed improve + $r = new ReflectionMethod($class, $function); + $doc = $r->getDocComment(); + $cache_id = $class. $type. $function; + preg_match_all('~ [\r\n]++ [\x20\t]++ \* [\x20\t]++ + @param + [\x20\t]++ + \K #memory reduce + ( [_a-z]++[_a-z\d]*+ + (?>[|/,][_a-z]+[_a-z\d]*)*+ + ) #1 types + [\x20\t]++ + &?+\$([_a-z]++[_a-z\d]*+) #2 name + ~sixSX', $doc, $params, PREG_SET_ORDER); + $parameters = $r->getParameters(); + //d($args, $params, $parameters); + if (count($parameters) > count($params)) + { + $message = 'phpDoc %d piece(s) @param description expected in %s%s%s(), %s given, ' . PHP_EOL + . 'called in %s on line %d ' . PHP_EOL + . 'and defined in %s on line %d'; + $message = sprintf($message, count($parameters), $class, $type, $function, count($params), $file, $line, $r->getFileName(), $r->getStartLine()); + trigger_error($message, E_USER_NOTICE); + } + foreach ($args as $i => $value) + { + if (! isset($params[$i])) return true; + if ($parameters[$i]->name !== $params[$i][2]) + { + $param_num = $i + 1; + $message = 'phpDoc @param %d in %s%s%s() must be named as $%s, $%s given, ' . PHP_EOL + . 'called in %s on line %d ' . PHP_EOL + . 'and defined in %s on line %d'; + $message = sprintf($message, $param_num, $class, $type, $function, $parameters[$i]->name, $params[$i][2], $file, $line, $r->getFileName(), $r->getStartLine()); + trigger_error($message, E_USER_NOTICE); + } + + $hints = preg_split('~[|/,]~sSX', $params[$i][1]); + if (! self::checkValueTypes($hints, $value)) + { + $param_num = $i + 1; + $message = 'Argument %d passed to %s%s%s() must be an %s, %s given, ' . PHP_EOL + . 'called in %s on line %d ' . PHP_EOL + . 'and defined in %s on line %d'; + $message = sprintf($message, $param_num, $class, $type, $function, implode('|', $hints), (is_object($value) ? get_class($value) . ' ' : '') . gettype($value), $file, $line, $r->getFileName(), $r->getStartLine()); + trigger_error($message, E_USER_WARNING); + return false; + } + } + return true; + } + + /** + * Return stacktrace. Correctly work with call_user_func*() + * (totally skip them correcting caller references). + * If $return_frame is present, return only $return_frame matched caller, not all stacktrace. + * + * @param string|null $re_ignore example: '~^' . preg_quote(__CLASS__, '~') . '(?![a-zA-Z\d])~sSX' + * @param int|null $return_frame + * @return array + */ + public static function debugBacktrace($re_ignore = null, $return_frame = null) + { + $trace = debug_backtrace(); + + $a = array(); + $frames = 0; + for ($i = 0, $n = count($trace); $i < $n; $i++) + { + $t = $trace[$i]; + if (! $t) continue; + + // Next frame. + $next = isset($trace[$i+1])? $trace[$i+1] : null; + + // Dummy frame before call_user_func*() frames. + if (! isset($t['file']) && $next) + { + $t['over_function'] = $trace[$i+1]['function']; + $t = $t + $trace[$i+1]; + $trace[$i+1] = null; // skip call_user_func on next iteration + } + + // Skip myself frame. + if (++$frames < 2) continue; + + // 'class' and 'function' field of next frame define where this frame function situated. + // Skip frames for functions situated in ignored places. + if ($re_ignore && $next) + { + // Name of function "inside which" frame was generated. + $frame_caller = (isset($next['class']) ? $next['class'] . $next['type'] : '') + . (isset($next['function']) ? $next['function'] : ''); + if (preg_match($re_ignore, $frame_caller)) continue; + } + + // On each iteration we consider ability to add PREVIOUS frame to $a stack. + if (count($a) === $return_frame) return $t; + $a[] = $t; + } + return $a; + } + + /** + * Checks a value to the allowed types + * + * @param array $types + * @param mixed $value + * @return bool + */ + public static function checkValueTypes(array $types, $value) + { + foreach ($types as $type) + { + $type = strtolower($type); + if (array_key_exists($type, self::$hints) && call_user_func(self::$hints[$type], $value)) return true; + if (is_object($value) && @is_a($value, $type)) return true; + if ($type === 'mixed') return true; + } + return false; + } +} \ No newline at end of file diff --git a/vendor/php-censure/ReflectionTypeHint_example.php b/vendor/php-censure/ReflectionTypeHint_example.php new file mode 100644 index 0000000..df88fcc --- /dev/null +++ b/vendor/php-censure/ReflectionTypeHint_example.php @@ -0,0 +1,23 @@ +myMethod('sss', 75467, new Exception(), true); diff --git a/vendor/php-censure/UTF8-CHANGELOG.txt b/vendor/php-censure/UTF8-CHANGELOG.txt new file mode 100644 index 0000000..502a615 --- /dev/null +++ b/vendor/php-censure/UTF8-CHANGELOG.txt @@ -0,0 +1,65 @@ +2.2.2 / 2011-06-24 + + * Convert case functions improved: from all russian charsets to UTF8 native support was added + * UTF8::stripos() speed improved + * constant REPLACEMENT_CHAR added + +2.2.1 / 2011-06-08 + + * UTF8::preg_quote_case_insensitive() added + * UTF8::stripos() speed improved + +2.2.0 / 2011-06-06 + + * UTF8::strlen(), UTF8::substr(), UTF8::strpos(), + UTF8::html_entity_encode(), UTF8::html_entity_decode(), + UTF8::convert_case(), UTF8::lowercase(), UTF8::uppercase() speed improved + * UTF8::stripos(), UTF8::to_unicode(), UTF8::from_unicode() added + * UTF8::strtolower(), UTF8::strtoupper() as wrapper to UTF8::convert_case() added + * Unicode character database to 6.0.0 (2010-06-04) updated + * UTF8::$convert_case_table improved + +2.1.3 / 2011-05-31 + + * UTF8::truncate() small bug fixed + +2.1.2 / 2011-03-25 + + * Класс требует PHP-5.3.x + * UTF8::$char_re deprecated + * Добавлен метод UTF8::tests(), который тестирует методы класса на правильность работы + * Добавлены методы UTF8::strcmp(), UTF8::strncmp(), UTF8::strcasecmp() + * UTF8::is_utf8(), UTF8::str_limit(), UTF8::str_split() speed improved + * Добавлен 2-й параметр в UTF8::html_entity_encode() + * Добавлен 3-й параметр в UTF8::ucwords() + * Методы UTF8::convert_case(), UTF8::lowercase(), UTF8::uppercase() могут принимать массив в 1-м параметре + * Мелкие улучшения в UTF8::strtr() + * Модернизирован класс ReflectionTypeHint + +2.1.1 / 2010-07-19 + + * Добавлены методы array_change_key_case(), range(), strtr() + * Улучшен метод convert_files_from() + * Unicode Character Database 5.2.0 + * Исправлены ошибки в trim(), ltrim(), rtrim(), str_pad(), которые могут возникать в некоторых случаях + +2.1.0 / 2010-03-26 + + * Удалён метод unescape_recursive() + * Добавлен метод convert_files_from() + * Несколько методов теперь могут принимать массив и делать их обход рекурсивно + * Почти все методы для обработки строк могут принимать и возвращать NULL + +2.0.2 / 2010-02-13 + + * Новые методы is_ascii(), ltrim(), rtrim(), trim(), str_pad(), strspn() + * Исправлена небольшая ошибка в str_limit() + * Исправлена ошибка в методах convert_from() и convert_to(): они ошибочно возвращали FALSE, + если подать на вход массив, содержащий элементы типа boolean со значением FALSE + +2.0.1 / 2010-02-08 + + * Удалён метод convert_from_cp1259(), используйте convert_from('cp1251') + * Метод convert_from_utf16() теперь приватный, используйте convert_from('UTF-16') + * Добавлены методы convert_to(), diactrical_remove(), diactrical_restore() + * Другие мелкие исправления diff --git a/vendor/php-censure/UTF8.php b/vendor/php-censure/UTF8.php new file mode 100644 index 0000000..3ac1eea --- /dev/null +++ b/vendor/php-censure/UTF8.php @@ -0,0 +1,4072 @@ + = 5.3.x + * + * In Russian: + * + * Поддержка UTF-8 в PHP 5. + * + * Возможности и преимущества использования этого класса + * * Совместимость с интерфейсом стандартных PHP функций, работающих с однобайтовыми кодировками + * * Возможность работы без PHP расширений ICONV и MBSTRING, если они есть, то активно используются! + * * Полезные функции, отсутствующие в ICONV и MBSTRING + * * Методы, которые принимают и возвращают строку, умеют принимать и возвращать null (удобно при выборках значений из базы данных) + * * Несколько методов умеют обрабатывать массивы рекурсивно + * * Единый интерфейс и инкапсуляция (можно унаследоваться и переопределить методы) + * * Высокая производительность, надёжность и качественный код + * * PHP >= 5.3.x + * + * Example: + * $s = 'Hello, Привет'; + * if (UTF8::is_utf8($s)) echo UTF8::strlen($s); + * + * UTF-8 encoding scheme: + * 2^7 0x00000000 — 0x0000007F 0xxxxxxx + * 2^11 0x00000080 — 0x000007FF 110xxxxx 10xxxxxx + * 2^16 0x00000800 — 0x0000FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 2^21 0x00010000 — 0x001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * 1-4 bytes length: 2^7 + 2^11 + 2^16 + 2^21 = 2 164 864 + * + * If I was a owner of the world, I would leave only 2 encoding: UTF-8 and UTF-32 ;-) + * + * Useful links + * http://ru.wikipedia.org/wiki/UTF8 + * http://www.madore.org/~david/misc/unitest/ A Unicode Test Page + * http://www.unicode.org/ + * http://www.unicode.org/reports/ + * http://www.unicode.org/reports/tr10/ Unicode Collation Algorithm + * http://www.unicode.org/Public/UCA/6.0.0/ Unicode Collation Algorithm + * http://www.unicode.org/reports/tr6/ A Standard Compression Scheme for Unicode + * http://www.fileformat.info/info/unicode/char/search.htm Unicode Character Search + * + * @link http://code.google.com/p/php5-utf8/ + * @license http://creativecommons.org/licenses/by-sa/3.0/ + * @author Nasibullin Rinat + * @version 2.2.2 + */ +class UTF8 +{ + #REPLACEMENT CHARACTER (for broken char) + const REPLACEMENT_CHAR = "\xEF\xBF\xBD"; #U+FFFD + + /** + * Regular expression for a character in UTF-8 without the use of a flag /u + * @deprecated Instead, use a dot (".") and the flag /u, it works faster! + * @var string + */ + public static $char_re = ' [\x09\x0A\x0D\x20-\x7E] # ASCII strict + # [\x00-\x7F] # ASCII non-strict (including control chars) + | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte + | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs + | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte + | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates + | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 + | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 + | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 + '; + + /** + * Combining diactrical marks (Unicode 5.1). + * + * For example, russian letters in composed form: "Ё" (U+0401), "Й" (U+0419), + * decomposed form: (U+0415 U+0308), (U+0418 U+0306) + * + * @link http://www.unicode.org/charts/PDF/U0300.pdf + * @link http://www.unicode.org/charts/PDF/U1DC0.pdf + * @link http://www.unicode.org/charts/PDF/UFE20.pdf + * @var string + */ + #public static $diactrical_re = '\p{M}'; #alternative, but only with /u flag + public static $diactrical_re = ' \xcc[\x80-\xb9]|\xcd[\x80-\xaf] #UNICODE range: U+0300 — U+036F (for letters) + | \xe2\x83[\x90-\xbf] #UNICODE range: U+20D0 — U+20FF (for symbols) + | \xe1\xb7[\x80-\xbf] #UNICODE range: U+1DC0 — U+1DFF (supplement) + | \xef\xb8[\xa0-\xaf] #UNICODE range: U+FE20 — U+FE2F (combining half marks) + '; + + /** + * @var array + */ + public static $html_special_chars_table = array( + '"' => "\x22", #U+0022 ["] " quotation mark = APL quote + '&' => "\x26", #U+0026 [&] & ampersand + '<' => "\x3c", #U+003C [<] < less-than sign + '>' => "\x3e", #U+003E [>] > greater-than sign + ); + + /** + * @link http://www.fileformat.info/format/w3c/entitytest.htm?sort=Unicode%20Character HTML Entity Browser Test Page + * @var array + */ + public static $html_entity_table = array( + #Latin-1 Entities: + ' ' => "\xc2\xa0", #U+00A0 [ ] no-break space = non-breaking space + '¡' => "\xc2\xa1", #U+00A1 [¡] inverted exclamation mark + '¢' => "\xc2\xa2", #U+00A2 [¢] cent sign + '£' => "\xc2\xa3", #U+00A3 [£] pound sign + '¤' => "\xc2\xa4", #U+00A4 [¤] currency sign + '¥' => "\xc2\xa5", #U+00A5 [¥] yen sign = yuan sign + '¦' => "\xc2\xa6", #U+00A6 [¦] broken bar = broken vertical bar + '§' => "\xc2\xa7", #U+00A7 [§] section sign + '¨' => "\xc2\xa8", #U+00A8 [¨] diaeresis = spacing diaeresis + '©' => "\xc2\xa9", #U+00A9 [©] copyright sign + 'ª' => "\xc2\xaa", #U+00AA [ª] feminine ordinal indicator + '«' => "\xc2\xab", #U+00AB [«] left-pointing double angle quotation mark = left pointing guillemet + '¬' => "\xc2\xac", #U+00AC [¬] not sign + '­' => "\xc2\xad", #U+00AD [ ] soft hyphen = discretionary hyphen + '®' => "\xc2\xae", #U+00AE [®] registered sign = registered trade mark sign + '¯' => "\xc2\xaf", #U+00AF [¯] macron = spacing macron = overline = APL overbar + '°' => "\xc2\xb0", #U+00B0 [°] degree sign + '±' => "\xc2\xb1", #U+00B1 [±] plus-minus sign = plus-or-minus sign + '²' => "\xc2\xb2", #U+00B2 [²] superscript two = superscript digit two = squared + '³' => "\xc2\xb3", #U+00B3 [³] superscript three = superscript digit three = cubed + '´' => "\xc2\xb4", #U+00B4 [´] acute accent = spacing acute + 'µ' => "\xc2\xb5", #U+00B5 [µ] micro sign + '¶' => "\xc2\xb6", #U+00B6 [¶] pilcrow sign = paragraph sign + '·' => "\xc2\xb7", #U+00B7 [·] middle dot = Georgian comma = Greek middle dot + '¸' => "\xc2\xb8", #U+00B8 [¸] cedilla = spacing cedilla + '¹' => "\xc2\xb9", #U+00B9 [¹] superscript one = superscript digit one + 'º' => "\xc2\xba", #U+00BA [º] masculine ordinal indicator + '»' => "\xc2\xbb", #U+00BB [»] right-pointing double angle quotation mark = right pointing guillemet + '¼' => "\xc2\xbc", #U+00BC [¼] vulgar fraction one quarter = fraction one quarter + '½' => "\xc2\xbd", #U+00BD [½] vulgar fraction one half = fraction one half + '¾' => "\xc2\xbe", #U+00BE [¾] vulgar fraction three quarters = fraction three quarters + '¿' => "\xc2\xbf", #U+00BF [¿] inverted question mark = turned question mark + #Latin capital letter + 'À' => "\xc3\x80", #Latin capital letter A with grave = Latin capital letter A grave + 'Á' => "\xc3\x81", #Latin capital letter A with acute + 'Â' => "\xc3\x82", #Latin capital letter A with circumflex + 'Ã' => "\xc3\x83", #Latin capital letter A with tilde + 'Ä' => "\xc3\x84", #Latin capital letter A with diaeresis + 'Å' => "\xc3\x85", #Latin capital letter A with ring above = Latin capital letter A ring + 'Æ' => "\xc3\x86", #Latin capital letter AE = Latin capital ligature AE + 'Ç' => "\xc3\x87", #Latin capital letter C with cedilla + 'È' => "\xc3\x88", #Latin capital letter E with grave + 'É' => "\xc3\x89", #Latin capital letter E with acute + 'Ê' => "\xc3\x8a", #Latin capital letter E with circumflex + 'Ë' => "\xc3\x8b", #Latin capital letter E with diaeresis + 'Ì' => "\xc3\x8c", #Latin capital letter I with grave + 'Í' => "\xc3\x8d", #Latin capital letter I with acute + 'Î' => "\xc3\x8e", #Latin capital letter I with circumflex + 'Ï' => "\xc3\x8f", #Latin capital letter I with diaeresis + 'Ð' => "\xc3\x90", #Latin capital letter ETH + 'Ñ' => "\xc3\x91", #Latin capital letter N with tilde + 'Ò' => "\xc3\x92", #Latin capital letter O with grave + 'Ó' => "\xc3\x93", #Latin capital letter O with acute + 'Ô' => "\xc3\x94", #Latin capital letter O with circumflex + 'Õ' => "\xc3\x95", #Latin capital letter O with tilde + 'Ö' => "\xc3\x96", #Latin capital letter O with diaeresis + '×' => "\xc3\x97", #U+00D7 [×] multiplication sign + 'Ø' => "\xc3\x98", #Latin capital letter O with stroke = Latin capital letter O slash + 'Ù' => "\xc3\x99", #Latin capital letter U with grave + 'Ú' => "\xc3\x9a", #Latin capital letter U with acute + 'Û' => "\xc3\x9b", #Latin capital letter U with circumflex + 'Ü' => "\xc3\x9c", #Latin capital letter U with diaeresis + 'Ý' => "\xc3\x9d", #Latin capital letter Y with acute + 'Þ' => "\xc3\x9e", #Latin capital letter THORN + #Latin small letter + 'ß' => "\xc3\x9f", #Latin small letter sharp s = ess-zed + 'à' => "\xc3\xa0", #Latin small letter a with grave = Latin small letter a grave + 'á' => "\xc3\xa1", #Latin small letter a with acute + 'â' => "\xc3\xa2", #Latin small letter a with circumflex + 'ã' => "\xc3\xa3", #Latin small letter a with tilde + 'ä' => "\xc3\xa4", #Latin small letter a with diaeresis + 'å' => "\xc3\xa5", #Latin small letter a with ring above = Latin small letter a ring + 'æ' => "\xc3\xa6", #Latin small letter ae = Latin small ligature ae + 'ç' => "\xc3\xa7", #Latin small letter c with cedilla + 'è' => "\xc3\xa8", #Latin small letter e with grave + 'é' => "\xc3\xa9", #Latin small letter e with acute + 'ê' => "\xc3\xaa", #Latin small letter e with circumflex + 'ë' => "\xc3\xab", #Latin small letter e with diaeresis + 'ì' => "\xc3\xac", #Latin small letter i with grave + 'í' => "\xc3\xad", #Latin small letter i with acute + 'î' => "\xc3\xae", #Latin small letter i with circumflex + 'ï' => "\xc3\xaf", #Latin small letter i with diaeresis + 'ð' => "\xc3\xb0", #Latin small letter eth + 'ñ' => "\xc3\xb1", #Latin small letter n with tilde + 'ò' => "\xc3\xb2", #Latin small letter o with grave + 'ó' => "\xc3\xb3", #Latin small letter o with acute + 'ô' => "\xc3\xb4", #Latin small letter o with circumflex + 'õ' => "\xc3\xb5", #Latin small letter o with tilde + 'ö' => "\xc3\xb6", #Latin small letter o with diaeresis + '÷' => "\xc3\xb7", #U+00F7 [÷] division sign + 'ø' => "\xc3\xb8", #Latin small letter o with stroke = Latin small letter o slash + 'ù' => "\xc3\xb9", #Latin small letter u with grave + 'ú' => "\xc3\xba", #Latin small letter u with acute + 'û' => "\xc3\xbb", #Latin small letter u with circumflex + 'ü' => "\xc3\xbc", #Latin small letter u with diaeresis + 'ý' => "\xc3\xbd", #Latin small letter y with acute + 'þ' => "\xc3\xbe", #Latin small letter thorn + 'ÿ' => "\xc3\xbf", #Latin small letter y with diaeresis + #Symbols and Greek Letters: + 'ƒ' => "\xc6\x92", #U+0192 [ƒ] Latin small f with hook = function = florin + 'Α' => "\xce\x91", #Greek capital letter alpha + 'Β' => "\xce\x92", #Greek capital letter beta + 'Γ' => "\xce\x93", #Greek capital letter gamma + 'Δ' => "\xce\x94", #Greek capital letter delta + 'Ε' => "\xce\x95", #Greek capital letter epsilon + 'Ζ' => "\xce\x96", #Greek capital letter zeta + 'Η' => "\xce\x97", #Greek capital letter eta + 'Θ' => "\xce\x98", #Greek capital letter theta + 'Ι' => "\xce\x99", #Greek capital letter iota + 'Κ' => "\xce\x9a", #Greek capital letter kappa + 'Λ' => "\xce\x9b", #Greek capital letter lambda + 'Μ' => "\xce\x9c", #Greek capital letter mu + 'Ν' => "\xce\x9d", #Greek capital letter nu + 'Ξ' => "\xce\x9e", #Greek capital letter xi + 'Ο' => "\xce\x9f", #Greek capital letter omicron + 'Π' => "\xce\xa0", #Greek capital letter pi + 'Ρ' => "\xce\xa1", #Greek capital letter rho + 'Σ' => "\xce\xa3", #Greek capital letter sigma + 'Τ' => "\xce\xa4", #Greek capital letter tau + 'Υ' => "\xce\xa5", #Greek capital letter upsilon + 'Φ' => "\xce\xa6", #Greek capital letter phi + 'Χ' => "\xce\xa7", #Greek capital letter chi + 'Ψ' => "\xce\xa8", #Greek capital letter psi + 'Ω' => "\xce\xa9", #Greek capital letter omega + 'α' => "\xce\xb1", #Greek small letter alpha + 'β' => "\xce\xb2", #Greek small letter beta + 'γ' => "\xce\xb3", #Greek small letter gamma + 'δ' => "\xce\xb4", #Greek small letter delta + 'ε' => "\xce\xb5", #Greek small letter epsilon + 'ζ' => "\xce\xb6", #Greek small letter zeta + 'η' => "\xce\xb7", #Greek small letter eta + 'θ' => "\xce\xb8", #Greek small letter theta + 'ι' => "\xce\xb9", #Greek small letter iota + 'κ' => "\xce\xba", #Greek small letter kappa + 'λ' => "\xce\xbb", #Greek small letter lambda + 'μ' => "\xce\xbc", #Greek small letter mu + 'ν' => "\xce\xbd", #Greek small letter nu + 'ξ' => "\xce\xbe", #Greek small letter xi + 'ο' => "\xce\xbf", #Greek small letter omicron + 'π' => "\xcf\x80", #Greek small letter pi + 'ρ' => "\xcf\x81", #Greek small letter rho + 'ς' => "\xcf\x82", #Greek small letter final sigma + 'σ' => "\xcf\x83", #Greek small letter sigma + 'τ' => "\xcf\x84", #Greek small letter tau + 'υ' => "\xcf\x85", #Greek small letter upsilon + 'φ' => "\xcf\x86", #Greek small letter phi + 'χ' => "\xcf\x87", #Greek small letter chi + 'ψ' => "\xcf\x88", #Greek small letter psi + 'ω' => "\xcf\x89", #Greek small letter omega + 'ϑ'=> "\xcf\x91", #Greek small letter theta symbol + 'ϒ' => "\xcf\x92", #Greek upsilon with hook symbol + 'ϖ' => "\xcf\x96", #U+03D6 [ϖ] Greek pi symbol + + '•' => "\xe2\x80\xa2", #U+2022 [•] bullet = black small circle + '…' => "\xe2\x80\xa6", #U+2026 […] horizontal ellipsis = three dot leader + '′' => "\xe2\x80\xb2", #U+2032 [′] prime = minutes = feet (для обозначения минут и футов) + '″' => "\xe2\x80\xb3", #U+2033 [″] double prime = seconds = inches (для обозначения секунд и дюймов). + '‾' => "\xe2\x80\xbe", #U+203E [‾] overline = spacing overscore + '⁄' => "\xe2\x81\x84", #U+2044 [⁄] fraction slash + '℘' => "\xe2\x84\x98", #U+2118 [℘] script capital P = power set = Weierstrass p + 'ℑ' => "\xe2\x84\x91", #U+2111 [ℑ] blackletter capital I = imaginary part + 'ℜ' => "\xe2\x84\x9c", #U+211C [ℜ] blackletter capital R = real part symbol + '™' => "\xe2\x84\xa2", #U+2122 [™] trade mark sign + 'ℵ' => "\xe2\x84\xb5", #U+2135 [ℵ] alef symbol = first transfinite cardinal + '←' => "\xe2\x86\x90", #U+2190 [←] leftwards arrow + '↑' => "\xe2\x86\x91", #U+2191 [↑] upwards arrow + '→' => "\xe2\x86\x92", #U+2192 [→] rightwards arrow + '↓' => "\xe2\x86\x93", #U+2193 [↓] downwards arrow + '↔' => "\xe2\x86\x94", #U+2194 [↔] left right arrow + '↵' => "\xe2\x86\xb5", #U+21B5 [↵] downwards arrow with corner leftwards = carriage return + '⇐' => "\xe2\x87\x90", #U+21D0 [⇐] leftwards double arrow + '⇑' => "\xe2\x87\x91", #U+21D1 [⇑] upwards double arrow + '⇒' => "\xe2\x87\x92", #U+21D2 [⇒] rightwards double arrow + '⇓' => "\xe2\x87\x93", #U+21D3 [⇓] downwards double arrow + '⇔' => "\xe2\x87\x94", #U+21D4 [⇔] left right double arrow + '∀' => "\xe2\x88\x80", #U+2200 [∀] for all + '∂' => "\xe2\x88\x82", #U+2202 [∂] partial differential + '∃' => "\xe2\x88\x83", #U+2203 [∃] there exists + '∅' => "\xe2\x88\x85", #U+2205 [∅] empty set = null set = diameter + '∇' => "\xe2\x88\x87", #U+2207 [∇] nabla = backward difference + '∈' => "\xe2\x88\x88", #U+2208 [∈] element of + '∉' => "\xe2\x88\x89", #U+2209 [∉] not an element of + '∋' => "\xe2\x88\x8b", #U+220B [∋] contains as member + '∏' => "\xe2\x88\x8f", #U+220F [∏] n-ary product = product sign + '∑' => "\xe2\x88\x91", #U+2211 [∑] n-ary sumation + '−' => "\xe2\x88\x92", #U+2212 [−] minus sign + '∗' => "\xe2\x88\x97", #U+2217 [∗] asterisk operator + '√' => "\xe2\x88\x9a", #U+221A [√] square root = radical sign + '∝' => "\xe2\x88\x9d", #U+221D [∝] proportional to + '∞' => "\xe2\x88\x9e", #U+221E [∞] infinity + '∠' => "\xe2\x88\xa0", #U+2220 [∠] angle + '∧' => "\xe2\x88\xa7", #U+2227 [∧] logical and = wedge + '∨' => "\xe2\x88\xa8", #U+2228 [∨] logical or = vee + '∩' => "\xe2\x88\xa9", #U+2229 [∩] intersection = cap + '∪' => "\xe2\x88\xaa", #U+222A [∪] union = cup + '∫' => "\xe2\x88\xab", #U+222B [∫] integral + '∴' => "\xe2\x88\xb4", #U+2234 [∴] therefore + '∼' => "\xe2\x88\xbc", #U+223C [∼] tilde operator = varies with = similar to + '≅' => "\xe2\x89\x85", #U+2245 [≅] approximately equal to + '≈' => "\xe2\x89\x88", #U+2248 [≈] almost equal to = asymptotic to + '≠' => "\xe2\x89\xa0", #U+2260 [≠] not equal to + '≡' => "\xe2\x89\xa1", #U+2261 [≡] identical to + '≤' => "\xe2\x89\xa4", #U+2264 [≤] less-than or equal to + '≥' => "\xe2\x89\xa5", #U+2265 [≥] greater-than or equal to + '⊂' => "\xe2\x8a\x82", #U+2282 [⊂] subset of + '⊃' => "\xe2\x8a\x83", #U+2283 [⊃] superset of + '⊄' => "\xe2\x8a\x84", #U+2284 [⊄] not a subset of + '⊆' => "\xe2\x8a\x86", #U+2286 [⊆] subset of or equal to + '⊇' => "\xe2\x8a\x87", #U+2287 [⊇] superset of or equal to + '⊕' => "\xe2\x8a\x95", #U+2295 [⊕] circled plus = direct sum + '⊗' => "\xe2\x8a\x97", #U+2297 [⊗] circled times = vector product + '⊥' => "\xe2\x8a\xa5", #U+22A5 [⊥] up tack = orthogonal to = perpendicular + '⋅' => "\xe2\x8b\x85", #U+22C5 [⋅] dot operator + '⌈' => "\xe2\x8c\x88", #U+2308 [⌈] left ceiling = APL upstile + '⌉' => "\xe2\x8c\x89", #U+2309 [⌉] right ceiling + '⌊' => "\xe2\x8c\x8a", #U+230A [⌊] left floor = APL downstile + '⌋' => "\xe2\x8c\x8b", #U+230B [⌋] right floor + '⟨' => "\xe2\x8c\xa9", #U+2329 [〈] left-pointing angle bracket = bra + '⟩' => "\xe2\x8c\xaa", #U+232A [〉] right-pointing angle bracket = ket + '◊' => "\xe2\x97\x8a", #U+25CA [◊] lozenge + '♠' => "\xe2\x99\xa0", #U+2660 [♠] black spade suit + '♣' => "\xe2\x99\xa3", #U+2663 [♣] black club suit = shamrock + '♥' => "\xe2\x99\xa5", #U+2665 [♥] black heart suit = valentine + '♦' => "\xe2\x99\xa6", #U+2666 [♦] black diamond suit + #Other Special Characters: + 'Œ' => "\xc5\x92", #U+0152 [Œ] Latin capital ligature OE + 'œ' => "\xc5\x93", #U+0153 [œ] Latin small ligature oe + 'Š' => "\xc5\xa0", #U+0160 [Š] Latin capital letter S with caron + 'š' => "\xc5\xa1", #U+0161 [š] Latin small letter s with caron + 'Ÿ' => "\xc5\xb8", #U+0178 [Ÿ] Latin capital letter Y with diaeresis + 'ˆ' => "\xcb\x86", #U+02C6 [ˆ] modifier letter circumflex accent + '˜' => "\xcb\x9c", #U+02DC [˜] small tilde + ' ' => "\xe2\x80\x82", #U+2002 [ ] en space + ' ' => "\xe2\x80\x83", #U+2003 [ ] em space + ' ' => "\xe2\x80\x89", #U+2009 [ ] thin space + '‌' => "\xe2\x80\x8c", #U+200C [‌] zero width non-joiner + '‍' => "\xe2\x80\x8d", #U+200D [‍] zero width joiner + '‎' => "\xe2\x80\x8e", #U+200E [‎] left-to-right mark + '‏' => "\xe2\x80\x8f", #U+200F [‏] right-to-left mark + '–' => "\xe2\x80\x93", #U+2013 [–] en dash + '—' => "\xe2\x80\x94", #U+2014 [—] em dash + '‘' => "\xe2\x80\x98", #U+2018 [‘] left single quotation mark + '’' => "\xe2\x80\x99", #U+2019 [’] right single quotation mark (and apostrophe!) + '‚' => "\xe2\x80\x9a", #U+201A [‚] single low-9 quotation mark + '“' => "\xe2\x80\x9c", #U+201C [“] left double quotation mark + '”' => "\xe2\x80\x9d", #U+201D [”] right double quotation mark + '„' => "\xe2\x80\x9e", #U+201E [„] double low-9 quotation mark + '†' => "\xe2\x80\xa0", #U+2020 [†] dagger + '‡' => "\xe2\x80\xa1", #U+2021 [‡] double dagger + '‰' => "\xe2\x80\xb0", #U+2030 [‰] per mille sign + '‹' => "\xe2\x80\xb9", #U+2039 [‹] single left-pointing angle quotation mark + '›' => "\xe2\x80\xba", #U+203A [›] single right-pointing angle quotation mark + '€' => "\xe2\x82\xac", #U+20AC [€] euro sign + ); + + /** + * This table contains the data on how cp1259 characters map into Unicode (UTF-8). + * The cp1259 map describes standart tatarish cyrillic charset and based on the cp1251 table. + * cp1259 -- this is an outdated one byte encoding of the Tatar language, + * which includes all the Russian letters from cp1251. + * + * @link http://search.cpan.org/CPAN/authors/id/A/AM/AMICHAUER/Lingua-TT-Yanalif-0.08.tar.gz + * @link http://www.unicode.org/charts/PDF/U0400.pdf + */ + public static $cp1259_table = array( + #bytes from 0x00 to 0x7F (ASCII) saved as is + "\x80" => "\xd3\x98", #U+04d8 CYRILLIC CAPITAL LETTER SCHWA + "\x81" => "\xd0\x83", #U+0403 CYRILLIC CAPITAL LETTER GJE + "\x82" => "\xe2\x80\x9a", #U+201a SINGLE LOW-9 QUOTATION MARK + "\x83" => "\xd1\x93", #U+0453 CYRILLIC SMALL LETTER GJE + "\x84" => "\xe2\x80\x9e", #U+201e DOUBLE LOW-9 QUOTATION MARK + "\x85" => "\xe2\x80\xa6", #U+2026 HORIZONTAL ELLIPSIS + "\x86" => "\xe2\x80\xa0", #U+2020 DAGGER + "\x87" => "\xe2\x80\xa1", #U+2021 DOUBLE DAGGER + "\x88" => "\xe2\x82\xac", #U+20ac EURO SIGN + "\x89" => "\xe2\x80\xb0", #U+2030 PER MILLE SIGN + "\x8a" => "\xd3\xa8", #U+04e8 CYRILLIC CAPITAL LETTER BARRED O + "\x8b" => "\xe2\x80\xb9", #U+2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK + "\x8c" => "\xd2\xae", #U+04ae CYRILLIC CAPITAL LETTER STRAIGHT U + "\x8d" => "\xd2\x96", #U+0496 CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER + "\x8e" => "\xd2\xa2", #U+04a2 CYRILLIC CAPITAL LETTER EN WITH HOOK + "\x8f" => "\xd2\xba", #U+04ba CYRILLIC CAPITAL LETTER SHHA + "\x90" => "\xd3\x99", #U+04d9 CYRILLIC SMALL LETTER SCHWA + "\x91" => "\xe2\x80\x98", #U+2018 LEFT SINGLE QUOTATION MARK + "\x92" => "\xe2\x80\x99", #U+2019 RIGHT SINGLE QUOTATION MARK + "\x93" => "\xe2\x80\x9c", #U+201c LEFT DOUBLE QUOTATION MARK + "\x94" => "\xe2\x80\x9d", #U+201d RIGHT DOUBLE QUOTATION MARK + "\x95" => "\xe2\x80\xa2", #U+2022 BULLET + "\x96" => "\xe2\x80\x93", #U+2013 EN DASH + "\x97" => "\xe2\x80\x94", #U+2014 EM DASH + #"\x98" #UNDEFINED + "\x99" => "\xe2\x84\xa2", #U+2122 TRADE MARK SIGN + "\x9a" => "\xd3\xa9", #U+04e9 CYRILLIC SMALL LETTER BARRED O + "\x9b" => "\xe2\x80\xba", #U+203a SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + "\x9c" => "\xd2\xaf", #U+04af CYRILLIC SMALL LETTER STRAIGHT U + "\x9d" => "\xd2\x97", #U+0497 CYRILLIC SMALL LETTER ZHE WITH DESCENDER + "\x9e" => "\xd2\xa3", #U+04a3 CYRILLIC SMALL LETTER EN WITH HOOK + "\x9f" => "\xd2\xbb", #U+04bb CYRILLIC SMALL LETTER SHHA + "\xa0" => "\xc2\xa0", #U+00a0 NO-BREAK SPACE + "\xa1" => "\xd0\x8e", #U+040e CYRILLIC CAPITAL LETTER SHORT U + "\xa2" => "\xd1\x9e", #U+045e CYRILLIC SMALL LETTER SHORT U + "\xa3" => "\xd0\x88", #U+0408 CYRILLIC CAPITAL LETTER JE + "\xa4" => "\xc2\xa4", #U+00a4 CURRENCY SIGN + "\xa5" => "\xd2\x90", #U+0490 CYRILLIC CAPITAL LETTER GHE WITH UPTURN + "\xa6" => "\xc2\xa6", #U+00a6 BROKEN BAR + "\xa7" => "\xc2\xa7", #U+00a7 SECTION SIGN + "\xa8" => "\xd0\x81", #U+0401 CYRILLIC CAPITAL LETTER IO + "\xa9" => "\xc2\xa9", #U+00a9 COPYRIGHT SIGN + "\xaa" => "\xd0\x84", #U+0404 CYRILLIC CAPITAL LETTER UKRAINIAN IE + "\xab" => "\xc2\xab", #U+00ab LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + "\xac" => "\xc2\xac", #U+00ac NOT SIGN + "\xad" => "\xc2\xad", #U+00ad SOFT HYPHEN + "\xae" => "\xc2\xae", #U+00ae REGISTERED SIGN + "\xaf" => "\xd0\x87", #U+0407 CYRILLIC CAPITAL LETTER YI + "\xb0" => "\xc2\xb0", #U+00b0 DEGREE SIGN + "\xb1" => "\xc2\xb1", #U+00b1 PLUS-MINUS SIGN + "\xb2" => "\xd0\x86", #U+0406 CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + "\xb3" => "\xd1\x96", #U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + "\xb4" => "\xd2\x91", #U+0491 CYRILLIC SMALL LETTER GHE WITH UPTURN + "\xb5" => "\xc2\xb5", #U+00b5 MICRO SIGN + "\xb6" => "\xc2\xb6", #U+00b6 PILCROW SIGN + "\xb7" => "\xc2\xb7", #U+00b7 MIDDLE DOT + "\xb8" => "\xd1\x91", #U+0451 CYRILLIC SMALL LETTER IO + "\xb9" => "\xe2\x84\x96", #U+2116 NUMERO SIGN + "\xba" => "\xd1\x94", #U+0454 CYRILLIC SMALL LETTER UKRAINIAN IE + "\xbb" => "\xc2\xbb", #U+00bb RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + "\xbc" => "\xd1\x98", #U+0458 CYRILLIC SMALL LETTER JE + "\xbd" => "\xd0\x85", #U+0405 CYRILLIC CAPITAL LETTER DZE + "\xbe" => "\xd1\x95", #U+0455 CYRILLIC SMALL LETTER DZE + "\xbf" => "\xd1\x97", #U+0457 CYRILLIC SMALL LETTER YI + "\xc0" => "\xd0\x90", #U+0410 CYRILLIC CAPITAL LETTER A + "\xc1" => "\xd0\x91", #U+0411 CYRILLIC CAPITAL LETTER BE + "\xc2" => "\xd0\x92", #U+0412 CYRILLIC CAPITAL LETTER VE + "\xc3" => "\xd0\x93", #U+0413 CYRILLIC CAPITAL LETTER GHE + "\xc4" => "\xd0\x94", #U+0414 CYRILLIC CAPITAL LETTER DE + "\xc5" => "\xd0\x95", #U+0415 CYRILLIC CAPITAL LETTER IE + "\xc6" => "\xd0\x96", #U+0416 CYRILLIC CAPITAL LETTER ZHE + "\xc7" => "\xd0\x97", #U+0417 CYRILLIC CAPITAL LETTER ZE + "\xc8" => "\xd0\x98", #U+0418 CYRILLIC CAPITAL LETTER I + "\xc9" => "\xd0\x99", #U+0419 CYRILLIC CAPITAL LETTER SHORT I + "\xca" => "\xd0\x9a", #U+041a CYRILLIC CAPITAL LETTER KA + "\xcb" => "\xd0\x9b", #U+041b CYRILLIC CAPITAL LETTER EL + "\xcc" => "\xd0\x9c", #U+041c CYRILLIC CAPITAL LETTER EM + "\xcd" => "\xd0\x9d", #U+041d CYRILLIC CAPITAL LETTER EN + "\xce" => "\xd0\x9e", #U+041e CYRILLIC CAPITAL LETTER O + "\xcf" => "\xd0\x9f", #U+041f CYRILLIC CAPITAL LETTER PE + "\xd0" => "\xd0\xa0", #U+0420 CYRILLIC CAPITAL LETTER ER + "\xd1" => "\xd0\xa1", #U+0421 CYRILLIC CAPITAL LETTER ES + "\xd2" => "\xd0\xa2", #U+0422 CYRILLIC CAPITAL LETTER TE + "\xd3" => "\xd0\xa3", #U+0423 CYRILLIC CAPITAL LETTER U + "\xd4" => "\xd0\xa4", #U+0424 CYRILLIC CAPITAL LETTER EF + "\xd5" => "\xd0\xa5", #U+0425 CYRILLIC CAPITAL LETTER HA + "\xd6" => "\xd0\xa6", #U+0426 CYRILLIC CAPITAL LETTER TSE + "\xd7" => "\xd0\xa7", #U+0427 CYRILLIC CAPITAL LETTER CHE + "\xd8" => "\xd0\xa8", #U+0428 CYRILLIC CAPITAL LETTER SHA + "\xd9" => "\xd0\xa9", #U+0429 CYRILLIC CAPITAL LETTER SHCHA + "\xda" => "\xd0\xaa", #U+042a CYRILLIC CAPITAL LETTER HARD SIGN + "\xdb" => "\xd0\xab", #U+042b CYRILLIC CAPITAL LETTER YERU + "\xdc" => "\xd0\xac", #U+042c CYRILLIC CAPITAL LETTER SOFT SIGN + "\xdd" => "\xd0\xad", #U+042d CYRILLIC CAPITAL LETTER E + "\xde" => "\xd0\xae", #U+042e CYRILLIC CAPITAL LETTER YU + "\xdf" => "\xd0\xaf", #U+042f CYRILLIC CAPITAL LETTER YA + "\xe0" => "\xd0\xb0", #U+0430 CYRILLIC SMALL LETTER A + "\xe1" => "\xd0\xb1", #U+0431 CYRILLIC SMALL LETTER BE + "\xe2" => "\xd0\xb2", #U+0432 CYRILLIC SMALL LETTER VE + "\xe3" => "\xd0\xb3", #U+0433 CYRILLIC SMALL LETTER GHE + "\xe4" => "\xd0\xb4", #U+0434 CYRILLIC SMALL LETTER DE + "\xe5" => "\xd0\xb5", #U+0435 CYRILLIC SMALL LETTER IE + "\xe6" => "\xd0\xb6", #U+0436 CYRILLIC SMALL LETTER ZHE + "\xe7" => "\xd0\xb7", #U+0437 CYRILLIC SMALL LETTER ZE + "\xe8" => "\xd0\xb8", #U+0438 CYRILLIC SMALL LETTER I + "\xe9" => "\xd0\xb9", #U+0439 CYRILLIC SMALL LETTER SHORT I + "\xea" => "\xd0\xba", #U+043a CYRILLIC SMALL LETTER KA + "\xeb" => "\xd0\xbb", #U+043b CYRILLIC SMALL LETTER EL + "\xec" => "\xd0\xbc", #U+043c CYRILLIC SMALL LETTER EM + "\xed" => "\xd0\xbd", #U+043d CYRILLIC SMALL LETTER EN + "\xee" => "\xd0\xbe", #U+043e CYRILLIC SMALL LETTER O + "\xef" => "\xd0\xbf", #U+043f CYRILLIC SMALL LETTER PE + "\xf0" => "\xd1\x80", #U+0440 CYRILLIC SMALL LETTER ER + "\xf1" => "\xd1\x81", #U+0441 CYRILLIC SMALL LETTER ES + "\xf2" => "\xd1\x82", #U+0442 CYRILLIC SMALL LETTER TE + "\xf3" => "\xd1\x83", #U+0443 CYRILLIC SMALL LETTER U + "\xf4" => "\xd1\x84", #U+0444 CYRILLIC SMALL LETTER EF + "\xf5" => "\xd1\x85", #U+0445 CYRILLIC SMALL LETTER HA + "\xf6" => "\xd1\x86", #U+0446 CYRILLIC SMALL LETTER TSE + "\xf7" => "\xd1\x87", #U+0447 CYRILLIC SMALL LETTER CHE + "\xf8" => "\xd1\x88", #U+0448 CYRILLIC SMALL LETTER SHA + "\xf9" => "\xd1\x89", #U+0449 CYRILLIC SMALL LETTER SHCHA + "\xfa" => "\xd1\x8a", #U+044a CYRILLIC SMALL LETTER HARD SIGN + "\xfb" => "\xd1\x8b", #U+044b CYRILLIC SMALL LETTER YERU + "\xfc" => "\xd1\x8c", #U+044c CYRILLIC SMALL LETTER SOFT SIGN + "\xfd" => "\xd1\x8d", #U+044d CYRILLIC SMALL LETTER E + "\xfe" => "\xd1\x8e", #U+044e CYRILLIC SMALL LETTER YU + "\xff" => "\xd1\x8f", #U+044f CYRILLIC SMALL LETTER YA + ); + + /** + * UTF-8 Case lookup table + * + * This lookuptable defines the upper case letters to their correspponding + * lower case letter in UTF-8 + * + * @author Andreas Gohr + */ + public static $convert_case_table = array( + #CASE_UPPER => case_lower + "\x41" => "\x61", #A a + "\x42" => "\x62", #B b + "\x43" => "\x63", #C c + "\x44" => "\x64", #D d + "\x45" => "\x65", #E e + "\x46" => "\x66", #F f + "\x47" => "\x67", #G g + "\x48" => "\x68", #H h + "\x49" => "\x69", #I i + "\x4a" => "\x6a", #J j + "\x4b" => "\x6b", #K k + "\x4c" => "\x6c", #L l + "\x4d" => "\x6d", #M m + "\x4e" => "\x6e", #N n + "\x4f" => "\x6f", #O o + "\x50" => "\x70", #P p + "\x51" => "\x71", #Q q + "\x52" => "\x72", #R r + "\x53" => "\x73", #S s + "\x54" => "\x74", #T t + "\x55" => "\x75", #U u + "\x56" => "\x76", #V v + "\x57" => "\x77", #W w + "\x58" => "\x78", #X x + "\x59" => "\x79", #Y y + "\x5a" => "\x7a", #Z z + "\xc3\x80" => "\xc3\xa0", + "\xc3\x81" => "\xc3\xa1", + "\xc3\x82" => "\xc3\xa2", + "\xc3\x83" => "\xc3\xa3", + "\xc3\x84" => "\xc3\xa4", + "\xc3\x85" => "\xc3\xa5", + "\xc3\x86" => "\xc3\xa6", + "\xc3\x87" => "\xc3\xa7", + "\xc3\x88" => "\xc3\xa8", + "\xc3\x89" => "\xc3\xa9", + "\xc3\x8a" => "\xc3\xaa", + "\xc3\x8b" => "\xc3\xab", + "\xc3\x8c" => "\xc3\xac", + "\xc3\x8d" => "\xc3\xad", + "\xc3\x8e" => "\xc3\xae", + "\xc3\x8f" => "\xc3\xaf", + "\xc3\x90" => "\xc3\xb0", + "\xc3\x91" => "\xc3\xb1", + "\xc3\x92" => "\xc3\xb2", + "\xc3\x93" => "\xc3\xb3", + "\xc3\x94" => "\xc3\xb4", + "\xc3\x95" => "\xc3\xb5", + "\xc3\x96" => "\xc3\xb6", + "\xc3\x98" => "\xc3\xb8", + "\xc3\x99" => "\xc3\xb9", + "\xc3\x9a" => "\xc3\xba", + "\xc3\x9b" => "\xc3\xbb", + "\xc3\x9c" => "\xc3\xbc", + "\xc3\x9d" => "\xc3\xbd", + "\xc3\x9e" => "\xc3\xbe", + "\xc4\x80" => "\xc4\x81", + "\xc4\x82" => "\xc4\x83", + "\xc4\x84" => "\xc4\x85", + "\xc4\x86" => "\xc4\x87", + "\xc4\x88" => "\xc4\x89", + "\xc4\x8a" => "\xc4\x8b", + "\xc4\x8c" => "\xc4\x8d", + "\xc4\x8e" => "\xc4\x8f", + "\xc4\x90" => "\xc4\x91", + "\xc4\x92" => "\xc4\x93", + "\xc4\x94" => "\xc4\x95", + "\xc4\x96" => "\xc4\x97", + "\xc4\x98" => "\xc4\x99", + "\xc4\x9a" => "\xc4\x9b", + "\xc4\x9c" => "\xc4\x9d", + "\xc4\x9e" => "\xc4\x9f", + "\xc4\xa0" => "\xc4\xa1", + "\xc4\xa2" => "\xc4\xa3", + "\xc4\xa4" => "\xc4\xa5", + "\xc4\xa6" => "\xc4\xa7", + "\xc4\xa8" => "\xc4\xa9", + "\xc4\xaa" => "\xc4\xab", + "\xc4\xac" => "\xc4\xad", + "\xc4\xae" => "\xc4\xaf", + "\xc4\xb2" => "\xc4\xb3", + "\xc4\xb4" => "\xc4\xb5", + "\xc4\xb6" => "\xc4\xb7", + "\xc4\xb9" => "\xc4\xba", + "\xc4\xbb" => "\xc4\xbc", + "\xc4\xbd" => "\xc4\xbe", + "\xc4\xbf" => "\xc5\x80", + "\xc5\x81" => "\xc5\x82", + "\xc5\x83" => "\xc5\x84", + "\xc5\x85" => "\xc5\x86", + "\xc5\x87" => "\xc5\x88", + "\xc5\x8a" => "\xc5\x8b", + "\xc5\x8c" => "\xc5\x8d", + "\xc5\x8e" => "\xc5\x8f", + "\xc5\x90" => "\xc5\x91", + "\xc5\x92" => "\xc5\x93", + "\xc5\x94" => "\xc5\x95", + "\xc5\x96" => "\xc5\x97", + "\xc5\x98" => "\xc5\x99", + "\xc5\x9a" => "\xc5\x9b", + "\xc5\x9c" => "\xc5\x9d", + "\xc5\x9e" => "\xc5\x9f", + "\xc5\xa0" => "\xc5\xa1", + "\xc5\xa2" => "\xc5\xa3", + "\xc5\xa4" => "\xc5\xa5", + "\xc5\xa6" => "\xc5\xa7", + "\xc5\xa8" => "\xc5\xa9", + "\xc5\xaa" => "\xc5\xab", + "\xc5\xac" => "\xc5\xad", + "\xc5\xae" => "\xc5\xaf", + "\xc5\xb0" => "\xc5\xb1", + "\xc5\xb2" => "\xc5\xb3", + "\xc5\xb4" => "\xc5\xb5", + "\xc5\xb6" => "\xc5\xb7", + "\xc5\xb8" => "\xc3\xbf", + "\xc5\xb9" => "\xc5\xba", + "\xc5\xbb" => "\xc5\xbc", + "\xc5\xbd" => "\xc5\xbe", + "\xc6\x81" => "\xc9\x93", + "\xc6\x82" => "\xc6\x83", + "\xc6\x84" => "\xc6\x85", + "\xc6\x86" => "\xc9\x94", + "\xc6\x87" => "\xc6\x88", + "\xc6\x89" => "\xc9\x96", + "\xc6\x8a" => "\xc9\x97", + "\xc6\x8b" => "\xc6\x8c", + "\xc6\x8e" => "\xc7\x9d", + "\xc6\x8f" => "\xc9\x99", + "\xc6\x90" => "\xc9\x9b", + "\xc6\x91" => "\xc6\x92", + "\xc6\x94" => "\xc9\xa3", + "\xc6\x96" => "\xc9\xa9", + "\xc6\x97" => "\xc9\xa8", + "\xc6\x98" => "\xc6\x99", + "\xc6\x9c" => "\xc9\xaf", + "\xc6\x9d" => "\xc9\xb2", + "\xc6\x9f" => "\xc9\xb5", + "\xc6\xa0" => "\xc6\xa1", + "\xc6\xa2" => "\xc6\xa3", + "\xc6\xa4" => "\xc6\xa5", + "\xc6\xa6" => "\xca\x80", + "\xc6\xa7" => "\xc6\xa8", + "\xc6\xa9" => "\xca\x83", + "\xc6\xac" => "\xc6\xad", + "\xc6\xae" => "\xca\x88", + "\xc6\xaf" => "\xc6\xb0", + "\xc6\xb1" => "\xca\x8a", + "\xc6\xb2" => "\xca\x8b", + "\xc6\xb3" => "\xc6\xb4", + "\xc6\xb5" => "\xc6\xb6", + "\xc6\xb7" => "\xca\x92", + "\xc6\xb8" => "\xc6\xb9", + "\xc6\xbc" => "\xc6\xbd", + "\xc7\x85" => "\xc7\x86", + "\xc7\x88" => "\xc7\x89", + "\xc7\x8b" => "\xc7\x8c", + "\xc7\x8d" => "\xc7\x8e", + "\xc7\x8f" => "\xc7\x90", + "\xc7\x91" => "\xc7\x92", + "\xc7\x93" => "\xc7\x94", + "\xc7\x95" => "\xc7\x96", + "\xc7\x97" => "\xc7\x98", + "\xc7\x99" => "\xc7\x9a", + "\xc7\x9b" => "\xc7\x9c", + "\xc7\x9e" => "\xc7\x9f", + "\xc7\xa0" => "\xc7\xa1", + "\xc7\xa2" => "\xc7\xa3", + "\xc7\xa4" => "\xc7\xa5", + "\xc7\xa6" => "\xc7\xa7", + "\xc7\xa8" => "\xc7\xa9", + "\xc7\xaa" => "\xc7\xab", + "\xc7\xac" => "\xc7\xad", + "\xc7\xae" => "\xc7\xaf", + "\xc7\xb2" => "\xc7\xb3", + "\xc7\xb4" => "\xc7\xb5", + "\xc7\xb6" => "\xc6\x95", + "\xc7\xb7" => "\xc6\xbf", + "\xc7\xb8" => "\xc7\xb9", + "\xc7\xba" => "\xc7\xbb", + "\xc7\xbc" => "\xc7\xbd", + "\xc7\xbe" => "\xc7\xbf", + "\xc8\x80" => "\xc8\x81", + "\xc8\x82" => "\xc8\x83", + "\xc8\x84" => "\xc8\x85", + "\xc8\x86" => "\xc8\x87", + "\xc8\x88" => "\xc8\x89", + "\xc8\x8a" => "\xc8\x8b", + "\xc8\x8c" => "\xc8\x8d", + "\xc8\x8e" => "\xc8\x8f", + "\xc8\x90" => "\xc8\x91", + "\xc8\x92" => "\xc8\x93", + "\xc8\x94" => "\xc8\x95", + "\xc8\x96" => "\xc8\x97", + "\xc8\x98" => "\xc8\x99", + "\xc8\x9a" => "\xc8\x9b", + "\xc8\x9c" => "\xc8\x9d", + "\xc8\x9e" => "\xc8\x9f", + "\xc8\xa0" => "\xc6\x9e", + "\xc8\xa2" => "\xc8\xa3", + "\xc8\xa4" => "\xc8\xa5", + "\xc8\xa6" => "\xc8\xa7", + "\xc8\xa8" => "\xc8\xa9", + "\xc8\xaa" => "\xc8\xab", + "\xc8\xac" => "\xc8\xad", + "\xc8\xae" => "\xc8\xaf", + "\xc8\xb0" => "\xc8\xb1", + "\xc8\xb2" => "\xc8\xb3", + "\xce\x86" => "\xce\xac", + "\xce\x88" => "\xce\xad", + "\xce\x89" => "\xce\xae", + "\xce\x8a" => "\xce\xaf", + "\xce\x8c" => "\xcf\x8c", + "\xce\x8e" => "\xcf\x8d", + "\xce\x8f" => "\xcf\x8e", + "\xce\x91" => "\xce\xb1", + "\xce\x92" => "\xce\xb2", + "\xce\x93" => "\xce\xb3", + "\xce\x94" => "\xce\xb4", + "\xce\x95" => "\xce\xb5", + "\xce\x96" => "\xce\xb6", + "\xce\x97" => "\xce\xb7", + "\xce\x98" => "\xce\xb8", + "\xce\x99" => "\xce\xb9", + "\xce\x9a" => "\xce\xba", + "\xce\x9b" => "\xce\xbb", + "\xce\x9c" => "\xc2\xb5", + "\xce\x9d" => "\xce\xbd", + "\xce\x9e" => "\xce\xbe", + "\xce\x9f" => "\xce\xbf", + "\xce\xa0" => "\xcf\x80", + "\xce\xa1" => "\xcf\x81", + "\xce\xa3" => "\xcf\x82", + "\xce\xa4" => "\xcf\x84", + "\xce\xa5" => "\xcf\x85", + "\xce\xa6" => "\xcf\x86", + "\xce\xa7" => "\xcf\x87", + "\xce\xa8" => "\xcf\x88", + "\xce\xa9" => "\xcf\x89", + "\xce\xaa" => "\xcf\x8a", + "\xce\xab" => "\xcf\x8b", + "\xcf\x98" => "\xcf\x99", + "\xcf\x9a" => "\xcf\x9b", + "\xcf\x9c" => "\xcf\x9d", + "\xcf\x9e" => "\xcf\x9f", + "\xcf\xa0" => "\xcf\xa1", + "\xcf\xa2" => "\xcf\xa3", + "\xcf\xa4" => "\xcf\xa5", + "\xcf\xa6" => "\xcf\xa7", + "\xcf\xa8" => "\xcf\xa9", + "\xcf\xaa" => "\xcf\xab", + "\xcf\xac" => "\xcf\xad", + "\xcf\xae" => "\xcf\xaf", + "\xd0\x80" => "\xd1\x90", + "\xd0\x81" => "\xd1\x91", + "\xd0\x82" => "\xd1\x92", + "\xd0\x83" => "\xd1\x93", + "\xd0\x84" => "\xd1\x94", + "\xd0\x85" => "\xd1\x95", + "\xd0\x86" => "\xd1\x96", + "\xd0\x87" => "\xd1\x97", + "\xd0\x88" => "\xd1\x98", + "\xd0\x89" => "\xd1\x99", + "\xd0\x8a" => "\xd1\x9a", + "\xd0\x8b" => "\xd1\x9b", + "\xd0\x8c" => "\xd1\x9c", + "\xd0\x8d" => "\xd1\x9d", + "\xd0\x8e" => "\xd1\x9e", + "\xd0\x8f" => "\xd1\x9f", + "\xd0\x90" => "\xd0\xb0", + "\xd0\x91" => "\xd0\xb1", + "\xd0\x92" => "\xd0\xb2", + "\xd0\x93" => "\xd0\xb3", + "\xd0\x94" => "\xd0\xb4", + "\xd0\x95" => "\xd0\xb5", + "\xd0\x96" => "\xd0\xb6", + "\xd0\x97" => "\xd0\xb7", + "\xd0\x98" => "\xd0\xb8", + "\xd0\x99" => "\xd0\xb9", + "\xd0\x9a" => "\xd0\xba", + "\xd0\x9b" => "\xd0\xbb", + "\xd0\x9c" => "\xd0\xbc", + "\xd0\x9d" => "\xd0\xbd", + "\xd0\x9e" => "\xd0\xbe", + "\xd0\x9f" => "\xd0\xbf", + "\xd0\xa0" => "\xd1\x80", + "\xd0\xa1" => "\xd1\x81", + "\xd0\xa2" => "\xd1\x82", + "\xd0\xa3" => "\xd1\x83", + "\xd0\xa4" => "\xd1\x84", + "\xd0\xa5" => "\xd1\x85", + "\xd0\xa6" => "\xd1\x86", + "\xd0\xa7" => "\xd1\x87", + "\xd0\xa8" => "\xd1\x88", + "\xd0\xa9" => "\xd1\x89", + "\xd0\xaa" => "\xd1\x8a", + "\xd0\xab" => "\xd1\x8b", + "\xd0\xac" => "\xd1\x8c", + "\xd0\xad" => "\xd1\x8d", + "\xd0\xae" => "\xd1\x8e", + "\xd0\xaf" => "\xd1\x8f", + "\xd1\xa0" => "\xd1\xa1", + "\xd1\xa2" => "\xd1\xa3", + "\xd1\xa4" => "\xd1\xa5", + "\xd1\xa6" => "\xd1\xa7", + "\xd1\xa8" => "\xd1\xa9", + "\xd1\xaa" => "\xd1\xab", + "\xd1\xac" => "\xd1\xad", + "\xd1\xae" => "\xd1\xaf", + "\xd1\xb0" => "\xd1\xb1", + "\xd1\xb2" => "\xd1\xb3", + "\xd1\xb4" => "\xd1\xb5", + "\xd1\xb6" => "\xd1\xb7", + "\xd1\xb8" => "\xd1\xb9", + "\xd1\xba" => "\xd1\xbb", + "\xd1\xbc" => "\xd1\xbd", + "\xd1\xbe" => "\xd1\xbf", + "\xd2\x80" => "\xd2\x81", + "\xd2\x8a" => "\xd2\x8b", + "\xd2\x8c" => "\xd2\x8d", + "\xd2\x8e" => "\xd2\x8f", + "\xd2\x90" => "\xd2\x91", + "\xd2\x92" => "\xd2\x93", + "\xd2\x94" => "\xd2\x95", + "\xd2\x96" => "\xd2\x97", + "\xd2\x98" => "\xd2\x99", + "\xd2\x9a" => "\xd2\x9b", + "\xd2\x9c" => "\xd2\x9d", + "\xd2\x9e" => "\xd2\x9f", + "\xd2\xa0" => "\xd2\xa1", + "\xd2\xa2" => "\xd2\xa3", + "\xd2\xa4" => "\xd2\xa5", + "\xd2\xa6" => "\xd2\xa7", + "\xd2\xa8" => "\xd2\xa9", + "\xd2\xaa" => "\xd2\xab", + "\xd2\xac" => "\xd2\xad", + "\xd2\xae" => "\xd2\xaf", + "\xd2\xb0" => "\xd2\xb1", + "\xd2\xb2" => "\xd2\xb3", + "\xd2\xb4" => "\xd2\xb5", + "\xd2\xb6" => "\xd2\xb7", + "\xd2\xb8" => "\xd2\xb9", + "\xd2\xba" => "\xd2\xbb", + "\xd2\xbc" => "\xd2\xbd", + "\xd2\xbe" => "\xd2\xbf", + "\xd3\x81" => "\xd3\x82", + "\xd3\x83" => "\xd3\x84", + "\xd3\x85" => "\xd3\x86", + "\xd3\x87" => "\xd3\x88", + "\xd3\x89" => "\xd3\x8a", + "\xd3\x8b" => "\xd3\x8c", + "\xd3\x8d" => "\xd3\x8e", + "\xd3\x90" => "\xd3\x91", + "\xd3\x92" => "\xd3\x93", + "\xd3\x94" => "\xd3\x95", + "\xd3\x96" => "\xd3\x97", + "\xd3\x98" => "\xd3\x99", + "\xd3\x9a" => "\xd3\x9b", + "\xd3\x9c" => "\xd3\x9d", + "\xd3\x9e" => "\xd3\x9f", + "\xd3\xa0" => "\xd3\xa1", + "\xd3\xa2" => "\xd3\xa3", + "\xd3\xa4" => "\xd3\xa5", + "\xd3\xa6" => "\xd3\xa7", + "\xd3\xa8" => "\xd3\xa9", + "\xd3\xaa" => "\xd3\xab", + "\xd3\xac" => "\xd3\xad", + "\xd3\xae" => "\xd3\xaf", + "\xd3\xb0" => "\xd3\xb1", + "\xd3\xb2" => "\xd3\xb3", + "\xd3\xb4" => "\xd3\xb5", + "\xd3\xb8" => "\xd3\xb9", + "\xd4\x80" => "\xd4\x81", + "\xd4\x82" => "\xd4\x83", + "\xd4\x84" => "\xd4\x85", + "\xd4\x86" => "\xd4\x87", + "\xd4\x88" => "\xd4\x89", + "\xd4\x8a" => "\xd4\x8b", + "\xd4\x8c" => "\xd4\x8d", + "\xd4\x8e" => "\xd4\x8f", + "\xd4\xb1" => "\xd5\xa1", + "\xd4\xb2" => "\xd5\xa2", + "\xd4\xb3" => "\xd5\xa3", + "\xd4\xb4" => "\xd5\xa4", + "\xd4\xb5" => "\xd5\xa5", + "\xd4\xb6" => "\xd5\xa6", + "\xd4\xb7" => "\xd5\xa7", + "\xd4\xb8" => "\xd5\xa8", + "\xd4\xb9" => "\xd5\xa9", + "\xd4\xba" => "\xd5\xaa", + "\xd4\xbb" => "\xd5\xab", + "\xd4\xbc" => "\xd5\xac", + "\xd4\xbd" => "\xd5\xad", + "\xd4\xbe" => "\xd5\xae", + "\xd4\xbf" => "\xd5\xaf", + "\xd5\x80" => "\xd5\xb0", + "\xd5\x81" => "\xd5\xb1", + "\xd5\x82" => "\xd5\xb2", + "\xd5\x83" => "\xd5\xb3", + "\xd5\x84" => "\xd5\xb4", + "\xd5\x85" => "\xd5\xb5", + "\xd5\x86" => "\xd5\xb6", + "\xd5\x87" => "\xd5\xb7", + "\xd5\x88" => "\xd5\xb8", + "\xd5\x89" => "\xd5\xb9", + "\xd5\x8a" => "\xd5\xba", + "\xd5\x8b" => "\xd5\xbb", + "\xd5\x8c" => "\xd5\xbc", + "\xd5\x8d" => "\xd5\xbd", + "\xd5\x8e" => "\xd5\xbe", + "\xd5\x8f" => "\xd5\xbf", + "\xd5\x90" => "\xd6\x80", + "\xd5\x91" => "\xd6\x81", + "\xd5\x92" => "\xd6\x82", + "\xd5\x93" => "\xd6\x83", + "\xd5\x94" => "\xd6\x84", + "\xd5\x95" => "\xd6\x85", + "\xd5\x96" => "\xd6\x86", + "\xe1\xb8\x80" => "\xe1\xb8\x81", + "\xe1\xb8\x82" => "\xe1\xb8\x83", + "\xe1\xb8\x84" => "\xe1\xb8\x85", + "\xe1\xb8\x86" => "\xe1\xb8\x87", + "\xe1\xb8\x88" => "\xe1\xb8\x89", + "\xe1\xb8\x8a" => "\xe1\xb8\x8b", + "\xe1\xb8\x8c" => "\xe1\xb8\x8d", + "\xe1\xb8\x8e" => "\xe1\xb8\x8f", + "\xe1\xb8\x90" => "\xe1\xb8\x91", + "\xe1\xb8\x92" => "\xe1\xb8\x93", + "\xe1\xb8\x94" => "\xe1\xb8\x95", + "\xe1\xb8\x96" => "\xe1\xb8\x97", + "\xe1\xb8\x98" => "\xe1\xb8\x99", + "\xe1\xb8\x9a" => "\xe1\xb8\x9b", + "\xe1\xb8\x9c" => "\xe1\xb8\x9d", + "\xe1\xb8\x9e" => "\xe1\xb8\x9f", + "\xe1\xb8\xa0" => "\xe1\xb8\xa1", + "\xe1\xb8\xa2" => "\xe1\xb8\xa3", + "\xe1\xb8\xa4" => "\xe1\xb8\xa5", + "\xe1\xb8\xa6" => "\xe1\xb8\xa7", + "\xe1\xb8\xa8" => "\xe1\xb8\xa9", + "\xe1\xb8\xaa" => "\xe1\xb8\xab", + "\xe1\xb8\xac" => "\xe1\xb8\xad", + "\xe1\xb8\xae" => "\xe1\xb8\xaf", + "\xe1\xb8\xb0" => "\xe1\xb8\xb1", + "\xe1\xb8\xb2" => "\xe1\xb8\xb3", + "\xe1\xb8\xb4" => "\xe1\xb8\xb5", + "\xe1\xb8\xb6" => "\xe1\xb8\xb7", + "\xe1\xb8\xb8" => "\xe1\xb8\xb9", + "\xe1\xb8\xba" => "\xe1\xb8\xbb", + "\xe1\xb8\xbc" => "\xe1\xb8\xbd", + "\xe1\xb8\xbe" => "\xe1\xb8\xbf", + "\xe1\xb9\x80" => "\xe1\xb9\x81", + "\xe1\xb9\x82" => "\xe1\xb9\x83", + "\xe1\xb9\x84" => "\xe1\xb9\x85", + "\xe1\xb9\x86" => "\xe1\xb9\x87", + "\xe1\xb9\x88" => "\xe1\xb9\x89", + "\xe1\xb9\x8a" => "\xe1\xb9\x8b", + "\xe1\xb9\x8c" => "\xe1\xb9\x8d", + "\xe1\xb9\x8e" => "\xe1\xb9\x8f", + "\xe1\xb9\x90" => "\xe1\xb9\x91", + "\xe1\xb9\x92" => "\xe1\xb9\x93", + "\xe1\xb9\x94" => "\xe1\xb9\x95", + "\xe1\xb9\x96" => "\xe1\xb9\x97", + "\xe1\xb9\x98" => "\xe1\xb9\x99", + "\xe1\xb9\x9a" => "\xe1\xb9\x9b", + "\xe1\xb9\x9c" => "\xe1\xb9\x9d", + "\xe1\xb9\x9e" => "\xe1\xb9\x9f", + "\xe1\xb9\xa0" => "\xe1\xb9\xa1", + "\xe1\xb9\xa2" => "\xe1\xb9\xa3", + "\xe1\xb9\xa4" => "\xe1\xb9\xa5", + "\xe1\xb9\xa6" => "\xe1\xb9\xa7", + "\xe1\xb9\xa8" => "\xe1\xb9\xa9", + "\xe1\xb9\xaa" => "\xe1\xb9\xab", + "\xe1\xb9\xac" => "\xe1\xb9\xad", + "\xe1\xb9\xae" => "\xe1\xb9\xaf", + "\xe1\xb9\xb0" => "\xe1\xb9\xb1", + "\xe1\xb9\xb2" => "\xe1\xb9\xb3", + "\xe1\xb9\xb4" => "\xe1\xb9\xb5", + "\xe1\xb9\xb6" => "\xe1\xb9\xb7", + "\xe1\xb9\xb8" => "\xe1\xb9\xb9", + "\xe1\xb9\xba" => "\xe1\xb9\xbb", + "\xe1\xb9\xbc" => "\xe1\xb9\xbd", + "\xe1\xb9\xbe" => "\xe1\xb9\xbf", + "\xe1\xba\x80" => "\xe1\xba\x81", + "\xe1\xba\x82" => "\xe1\xba\x83", + "\xe1\xba\x84" => "\xe1\xba\x85", + "\xe1\xba\x86" => "\xe1\xba\x87", + "\xe1\xba\x88" => "\xe1\xba\x89", + "\xe1\xba\x8a" => "\xe1\xba\x8b", + "\xe1\xba\x8c" => "\xe1\xba\x8d", + "\xe1\xba\x8e" => "\xe1\xba\x8f", + "\xe1\xba\x90" => "\xe1\xba\x91", + "\xe1\xba\x92" => "\xe1\xba\x93", + "\xe1\xba\x94" => "\xe1\xba\x95", + "\xe1\xba\xa0" => "\xe1\xba\xa1", + "\xe1\xba\xa2" => "\xe1\xba\xa3", + "\xe1\xba\xa4" => "\xe1\xba\xa5", + "\xe1\xba\xa6" => "\xe1\xba\xa7", + "\xe1\xba\xa8" => "\xe1\xba\xa9", + "\xe1\xba\xaa" => "\xe1\xba\xab", + "\xe1\xba\xac" => "\xe1\xba\xad", + "\xe1\xba\xae" => "\xe1\xba\xaf", + "\xe1\xba\xb0" => "\xe1\xba\xb1", + "\xe1\xba\xb2" => "\xe1\xba\xb3", + "\xe1\xba\xb4" => "\xe1\xba\xb5", + "\xe1\xba\xb6" => "\xe1\xba\xb7", + "\xe1\xba\xb8" => "\xe1\xba\xb9", + "\xe1\xba\xba" => "\xe1\xba\xbb", + "\xe1\xba\xbc" => "\xe1\xba\xbd", + "\xe1\xba\xbe" => "\xe1\xba\xbf", + "\xe1\xbb\x80" => "\xe1\xbb\x81", + "\xe1\xbb\x82" => "\xe1\xbb\x83", + "\xe1\xbb\x84" => "\xe1\xbb\x85", + "\xe1\xbb\x86" => "\xe1\xbb\x87", + "\xe1\xbb\x88" => "\xe1\xbb\x89", + "\xe1\xbb\x8a" => "\xe1\xbb\x8b", + "\xe1\xbb\x8c" => "\xe1\xbb\x8d", + "\xe1\xbb\x8e" => "\xe1\xbb\x8f", + "\xe1\xbb\x90" => "\xe1\xbb\x91", + "\xe1\xbb\x92" => "\xe1\xbb\x93", + "\xe1\xbb\x94" => "\xe1\xbb\x95", + "\xe1\xbb\x96" => "\xe1\xbb\x97", + "\xe1\xbb\x98" => "\xe1\xbb\x99", + "\xe1\xbb\x9a" => "\xe1\xbb\x9b", + "\xe1\xbb\x9c" => "\xe1\xbb\x9d", + "\xe1\xbb\x9e" => "\xe1\xbb\x9f", + "\xe1\xbb\xa0" => "\xe1\xbb\xa1", + "\xe1\xbb\xa2" => "\xe1\xbb\xa3", + "\xe1\xbb\xa4" => "\xe1\xbb\xa5", + "\xe1\xbb\xa6" => "\xe1\xbb\xa7", + "\xe1\xbb\xa8" => "\xe1\xbb\xa9", + "\xe1\xbb\xaa" => "\xe1\xbb\xab", + "\xe1\xbb\xac" => "\xe1\xbb\xad", + "\xe1\xbb\xae" => "\xe1\xbb\xaf", + "\xe1\xbb\xb0" => "\xe1\xbb\xb1", + "\xe1\xbb\xb2" => "\xe1\xbb\xb3", + "\xe1\xbb\xb4" => "\xe1\xbb\xb5", + "\xe1\xbb\xb6" => "\xe1\xbb\xb7", + "\xe1\xbb\xb8" => "\xe1\xbb\xb9", + "\xe1\xbc\x88" => "\xe1\xbc\x80", + "\xe1\xbc\x89" => "\xe1\xbc\x81", + "\xe1\xbc\x8a" => "\xe1\xbc\x82", + "\xe1\xbc\x8b" => "\xe1\xbc\x83", + "\xe1\xbc\x8c" => "\xe1\xbc\x84", + "\xe1\xbc\x8d" => "\xe1\xbc\x85", + "\xe1\xbc\x8e" => "\xe1\xbc\x86", + "\xe1\xbc\x8f" => "\xe1\xbc\x87", + "\xe1\xbc\x98" => "\xe1\xbc\x90", + "\xe1\xbc\x99" => "\xe1\xbc\x91", + "\xe1\xbc\x9a" => "\xe1\xbc\x92", + "\xe1\xbc\x9b" => "\xe1\xbc\x93", + "\xe1\xbc\x9c" => "\xe1\xbc\x94", + "\xe1\xbc\x9d" => "\xe1\xbc\x95", + "\xe1\xbc\xa9" => "\xe1\xbc\xa1", + "\xe1\xbc\xaa" => "\xe1\xbc\xa2", + "\xe1\xbc\xab" => "\xe1\xbc\xa3", + "\xe1\xbc\xac" => "\xe1\xbc\xa4", + "\xe1\xbc\xad" => "\xe1\xbc\xa5", + "\xe1\xbc\xae" => "\xe1\xbc\xa6", + "\xe1\xbc\xaf" => "\xe1\xbc\xa7", + "\xe1\xbc\xb8" => "\xe1\xbc\xb0", + "\xe1\xbc\xb9" => "\xe1\xbc\xb1", + "\xe1\xbc\xba" => "\xe1\xbc\xb2", + "\xe1\xbc\xbb" => "\xe1\xbc\xb3", + "\xe1\xbc\xbc" => "\xe1\xbc\xb4", + "\xe1\xbc\xbd" => "\xe1\xbc\xb5", + "\xe1\xbc\xbe" => "\xe1\xbc\xb6", + "\xe1\xbc\xbf" => "\xe1\xbc\xb7", + "\xe1\xbd\x88" => "\xe1\xbd\x80", + "\xe1\xbd\x89" => "\xe1\xbd\x81", + "\xe1\xbd\x8a" => "\xe1\xbd\x82", + "\xe1\xbd\x8b" => "\xe1\xbd\x83", + "\xe1\xbd\x8c" => "\xe1\xbd\x84", + "\xe1\xbd\x8d" => "\xe1\xbd\x85", + "\xe1\xbd\x99" => "\xe1\xbd\x91", + "\xe1\xbd\x9b" => "\xe1\xbd\x93", + "\xe1\xbd\x9d" => "\xe1\xbd\x95", + "\xe1\xbd\x9f" => "\xe1\xbd\x97", + "\xe1\xbd\xa9" => "\xe1\xbd\xa1", + "\xe1\xbd\xaa" => "\xe1\xbd\xa2", + "\xe1\xbd\xab" => "\xe1\xbd\xa3", + "\xe1\xbd\xac" => "\xe1\xbd\xa4", + "\xe1\xbd\xad" => "\xe1\xbd\xa5", + "\xe1\xbd\xae" => "\xe1\xbd\xa6", + "\xe1\xbd\xaf" => "\xe1\xbd\xa7", + "\xe1\xbe\x88" => "\xe1\xbe\x80", + "\xe1\xbe\x89" => "\xe1\xbe\x81", + "\xe1\xbe\x8a" => "\xe1\xbe\x82", + "\xe1\xbe\x8b" => "\xe1\xbe\x83", + "\xe1\xbe\x8c" => "\xe1\xbe\x84", + "\xe1\xbe\x8d" => "\xe1\xbe\x85", + "\xe1\xbe\x8e" => "\xe1\xbe\x86", + "\xe1\xbe\x8f" => "\xe1\xbe\x87", + "\xe1\xbe\x98" => "\xe1\xbe\x90", + "\xe1\xbe\x99" => "\xe1\xbe\x91", + "\xe1\xbe\x9a" => "\xe1\xbe\x92", + "\xe1\xbe\x9b" => "\xe1\xbe\x93", + "\xe1\xbe\x9c" => "\xe1\xbe\x94", + "\xe1\xbe\x9d" => "\xe1\xbe\x95", + "\xe1\xbe\x9e" => "\xe1\xbe\x96", + "\xe1\xbe\x9f" => "\xe1\xbe\x97", + "\xe1\xbe\xa9" => "\xe1\xbe\xa1", + "\xe1\xbe\xaa" => "\xe1\xbe\xa2", + "\xe1\xbe\xab" => "\xe1\xbe\xa3", + "\xe1\xbe\xac" => "\xe1\xbe\xa4", + "\xe1\xbe\xad" => "\xe1\xbe\xa5", + "\xe1\xbe\xae" => "\xe1\xbe\xa6", + "\xe1\xbe\xaf" => "\xe1\xbe\xa7", + "\xe1\xbe\xb8" => "\xe1\xbe\xb0", + "\xe1\xbe\xb9" => "\xe1\xbe\xb1", + "\xe1\xbe\xba" => "\xe1\xbd\xb0", + "\xe1\xbe\xbb" => "\xe1\xbd\xb1", + "\xe1\xbe\xbc" => "\xe1\xbe\xb3", + "\xe1\xbf\x88" => "\xe1\xbd\xb2", + "\xe1\xbf\x89" => "\xe1\xbd\xb3", + "\xe1\xbf\x8a" => "\xe1\xbd\xb4", + "\xe1\xbf\x8b" => "\xe1\xbd\xb5", + "\xe1\xbf\x8c" => "\xe1\xbf\x83", + "\xe1\xbf\x98" => "\xe1\xbf\x90", + "\xe1\xbf\x99" => "\xe1\xbf\x91", + "\xe1\xbf\x9a" => "\xe1\xbd\xb6", + "\xe1\xbf\x9b" => "\xe1\xbd\xb7", + "\xe1\xbf\xa9" => "\xe1\xbf\xa1", + "\xe1\xbf\xaa" => "\xe1\xbd\xba", + "\xe1\xbf\xab" => "\xe1\xbd\xbb", + "\xe1\xbf\xac" => "\xe1\xbf\xa5", + "\xe1\xbf\xb8" => "\xe1\xbd\xb8", + "\xe1\xbf\xb9" => "\xe1\xbd\xb9", + "\xe1\xbf\xba" => "\xe1\xbd\xbc", + "\xe1\xbf\xbb" => "\xe1\xbd\xbd", + "\xe1\xbf\xbc" => "\xe1\xbf\xb3", + "\xef\xbc\xa1" => "\xef\xbd\x81", + "\xef\xbc\xa2" => "\xef\xbd\x82", + "\xef\xbc\xa3" => "\xef\xbd\x83", + "\xef\xbc\xa4" => "\xef\xbd\x84", + "\xef\xbc\xa5" => "\xef\xbd\x85", + "\xef\xbc\xa6" => "\xef\xbd\x86", + "\xef\xbc\xa7" => "\xef\xbd\x87", + "\xef\xbc\xa8" => "\xef\xbd\x88", + "\xef\xbc\xa9" => "\xef\xbd\x89", + "\xef\xbc\xaa" => "\xef\xbd\x8a", + "\xef\xbc\xab" => "\xef\xbd\x8b", + "\xef\xbc\xac" => "\xef\xbd\x8c", + "\xef\xbc\xad" => "\xef\xbd\x8d", + "\xef\xbc\xae" => "\xef\xbd\x8e", + "\xef\xbc\xaf" => "\xef\xbd\x8f", + "\xef\xbc\xb0" => "\xef\xbd\x90", + "\xef\xbc\xb1" => "\xef\xbd\x91", + "\xef\xbc\xb2" => "\xef\xbd\x92", + "\xef\xbc\xb3" => "\xef\xbd\x93", + "\xef\xbc\xb4" => "\xef\xbd\x94", + "\xef\xbc\xb5" => "\xef\xbd\x95", + "\xef\xbc\xb6" => "\xef\xbd\x96", + "\xef\xbc\xb7" => "\xef\xbd\x97", + "\xef\xbc\xb8" => "\xef\xbd\x98", + "\xef\xbc\xb9" => "\xef\xbd\x99", + "\xef\xbc\xba" => "\xef\xbd\x9a", + ); + + #Unicode Character Database 6.0.0 (2010-06-04) + #autogenerated by unicode_blocks_txt2php() PHP function at 2011-06-04 00:19:39, 209 blocks total + public static $unicode_blocks = array( + 'Basic Latin' => array( + 0 => 0x0000, + 1 => 0x007F, + 2 => 0, + ), + 'Latin-1 Supplement' => array( + 0 => 0x0080, + 1 => 0x00FF, + 2 => 1, + ), + 'Latin Extended-A' => array( + 0 => 0x0100, + 1 => 0x017F, + 2 => 2, + ), + 'Latin Extended-B' => array( + 0 => 0x0180, + 1 => 0x024F, + 2 => 3, + ), + 'IPA Extensions' => array( + 0 => 0x0250, + 1 => 0x02AF, + 2 => 4, + ), + 'Spacing Modifier Letters' => array( + 0 => 0x02B0, + 1 => 0x02FF, + 2 => 5, + ), + 'Combining Diacritical Marks' => array( + 0 => 0x0300, + 1 => 0x036F, + 2 => 6, + ), + 'Greek and Coptic' => array( + 0 => 0x0370, + 1 => 0x03FF, + 2 => 7, + ), + 'Cyrillic' => array( + 0 => 0x0400, + 1 => 0x04FF, + 2 => 8, + ), + 'Cyrillic Supplement' => array( + 0 => 0x0500, + 1 => 0x052F, + 2 => 9, + ), + 'Armenian' => array( + 0 => 0x0530, + 1 => 0x058F, + 2 => 10, + ), + 'Hebrew' => array( + 0 => 0x0590, + 1 => 0x05FF, + 2 => 11, + ), + 'Arabic' => array( + 0 => 0x0600, + 1 => 0x06FF, + 2 => 12, + ), + 'Syriac' => array( + 0 => 0x0700, + 1 => 0x074F, + 2 => 13, + ), + 'Arabic Supplement' => array( + 0 => 0x0750, + 1 => 0x077F, + 2 => 14, + ), + 'Thaana' => array( + 0 => 0x0780, + 1 => 0x07BF, + 2 => 15, + ), + 'NKo' => array( + 0 => 0x07C0, + 1 => 0x07FF, + 2 => 16, + ), + 'Samaritan' => array( + 0 => 0x0800, + 1 => 0x083F, + 2 => 17, + ), + 'Mandaic' => array( + 0 => 0x0840, + 1 => 0x085F, + 2 => 18, + ), + 'Devanagari' => array( + 0 => 0x0900, + 1 => 0x097F, + 2 => 19, + ), + 'Bengali' => array( + 0 => 0x0980, + 1 => 0x09FF, + 2 => 20, + ), + 'Gurmukhi' => array( + 0 => 0x0A00, + 1 => 0x0A7F, + 2 => 21, + ), + 'Gujarati' => array( + 0 => 0x0A80, + 1 => 0x0AFF, + 2 => 22, + ), + 'Oriya' => array( + 0 => 0x0B00, + 1 => 0x0B7F, + 2 => 23, + ), + 'Tamil' => array( + 0 => 0x0B80, + 1 => 0x0BFF, + 2 => 24, + ), + 'Telugu' => array( + 0 => 0x0C00, + 1 => 0x0C7F, + 2 => 25, + ), + 'Kannada' => array( + 0 => 0x0C80, + 1 => 0x0CFF, + 2 => 26, + ), + 'Malayalam' => array( + 0 => 0x0D00, + 1 => 0x0D7F, + 2 => 27, + ), + 'Sinhala' => array( + 0 => 0x0D80, + 1 => 0x0DFF, + 2 => 28, + ), + 'Thai' => array( + 0 => 0x0E00, + 1 => 0x0E7F, + 2 => 29, + ), + 'Lao' => array( + 0 => 0x0E80, + 1 => 0x0EFF, + 2 => 30, + ), + 'Tibetan' => array( + 0 => 0x0F00, + 1 => 0x0FFF, + 2 => 31, + ), + 'Myanmar' => array( + 0 => 0x1000, + 1 => 0x109F, + 2 => 32, + ), + 'Georgian' => array( + 0 => 0x10A0, + 1 => 0x10FF, + 2 => 33, + ), + 'Hangul Jamo' => array( + 0 => 0x1100, + 1 => 0x11FF, + 2 => 34, + ), + 'Ethiopic' => array( + 0 => 0x1200, + 1 => 0x137F, + 2 => 35, + ), + 'Ethiopic Supplement' => array( + 0 => 0x1380, + 1 => 0x139F, + 2 => 36, + ), + 'Cherokee' => array( + 0 => 0x13A0, + 1 => 0x13FF, + 2 => 37, + ), + 'Unified Canadian Aboriginal Syllabics' => array( + 0 => 0x1400, + 1 => 0x167F, + 2 => 38, + ), + 'Ogham' => array( + 0 => 0x1680, + 1 => 0x169F, + 2 => 39, + ), + 'Runic' => array( + 0 => 0x16A0, + 1 => 0x16FF, + 2 => 40, + ), + 'Tagalog' => array( + 0 => 0x1700, + 1 => 0x171F, + 2 => 41, + ), + 'Hanunoo' => array( + 0 => 0x1720, + 1 => 0x173F, + 2 => 42, + ), + 'Buhid' => array( + 0 => 0x1740, + 1 => 0x175F, + 2 => 43, + ), + 'Tagbanwa' => array( + 0 => 0x1760, + 1 => 0x177F, + 2 => 44, + ), + 'Khmer' => array( + 0 => 0x1780, + 1 => 0x17FF, + 2 => 45, + ), + 'Mongolian' => array( + 0 => 0x1800, + 1 => 0x18AF, + 2 => 46, + ), + 'Unified Canadian Aboriginal Syllabics Extended' => array( + 0 => 0x18B0, + 1 => 0x18FF, + 2 => 47, + ), + 'Limbu' => array( + 0 => 0x1900, + 1 => 0x194F, + 2 => 48, + ), + 'Tai Le' => array( + 0 => 0x1950, + 1 => 0x197F, + 2 => 49, + ), + 'New Tai Lue' => array( + 0 => 0x1980, + 1 => 0x19DF, + 2 => 50, + ), + 'Khmer Symbols' => array( + 0 => 0x19E0, + 1 => 0x19FF, + 2 => 51, + ), + 'Buginese' => array( + 0 => 0x1A00, + 1 => 0x1A1F, + 2 => 52, + ), + 'Tai Tham' => array( + 0 => 0x1A20, + 1 => 0x1AAF, + 2 => 53, + ), + 'Balinese' => array( + 0 => 0x1B00, + 1 => 0x1B7F, + 2 => 54, + ), + 'Sundanese' => array( + 0 => 0x1B80, + 1 => 0x1BBF, + 2 => 55, + ), + 'Batak' => array( + 0 => 0x1BC0, + 1 => 0x1BFF, + 2 => 56, + ), + 'Lepcha' => array( + 0 => 0x1C00, + 1 => 0x1C4F, + 2 => 57, + ), + 'Ol Chiki' => array( + 0 => 0x1C50, + 1 => 0x1C7F, + 2 => 58, + ), + 'Vedic Extensions' => array( + 0 => 0x1CD0, + 1 => 0x1CFF, + 2 => 59, + ), + 'Phonetic Extensions' => array( + 0 => 0x1D00, + 1 => 0x1D7F, + 2 => 60, + ), + 'Phonetic Extensions Supplement' => array( + 0 => 0x1D80, + 1 => 0x1DBF, + 2 => 61, + ), + 'Combining Diacritical Marks Supplement' => array( + 0 => 0x1DC0, + 1 => 0x1DFF, + 2 => 62, + ), + 'Latin Extended Additional' => array( + 0 => 0x1E00, + 1 => 0x1EFF, + 2 => 63, + ), + 'Greek Extended' => array( + 0 => 0x1F00, + 1 => 0x1FFF, + 2 => 64, + ), + 'General Punctuation' => array( + 0 => 0x2000, + 1 => 0x206F, + 2 => 65, + ), + 'Superscripts and Subscripts' => array( + 0 => 0x2070, + 1 => 0x209F, + 2 => 66, + ), + 'Currency Symbols' => array( + 0 => 0x20A0, + 1 => 0x20CF, + 2 => 67, + ), + 'Combining Diacritical Marks for Symbols' => array( + 0 => 0x20D0, + 1 => 0x20FF, + 2 => 68, + ), + 'Letterlike Symbols' => array( + 0 => 0x2100, + 1 => 0x214F, + 2 => 69, + ), + 'Number Forms' => array( + 0 => 0x2150, + 1 => 0x218F, + 2 => 70, + ), + 'Arrows' => array( + 0 => 0x2190, + 1 => 0x21FF, + 2 => 71, + ), + 'Mathematical Operators' => array( + 0 => 0x2200, + 1 => 0x22FF, + 2 => 72, + ), + 'Miscellaneous Technical' => array( + 0 => 0x2300, + 1 => 0x23FF, + 2 => 73, + ), + 'Control Pictures' => array( + 0 => 0x2400, + 1 => 0x243F, + 2 => 74, + ), + 'Optical Character Recognition' => array( + 0 => 0x2440, + 1 => 0x245F, + 2 => 75, + ), + 'Enclosed Alphanumerics' => array( + 0 => 0x2460, + 1 => 0x24FF, + 2 => 76, + ), + 'Box Drawing' => array( + 0 => 0x2500, + 1 => 0x257F, + 2 => 77, + ), + 'Block Elements' => array( + 0 => 0x2580, + 1 => 0x259F, + 2 => 78, + ), + 'Geometric Shapes' => array( + 0 => 0x25A0, + 1 => 0x25FF, + 2 => 79, + ), + 'Miscellaneous Symbols' => array( + 0 => 0x2600, + 1 => 0x26FF, + 2 => 80, + ), + 'Dingbats' => array( + 0 => 0x2700, + 1 => 0x27BF, + 2 => 81, + ), + 'Miscellaneous Mathematical Symbols-A' => array( + 0 => 0x27C0, + 1 => 0x27EF, + 2 => 82, + ), + 'Supplemental Arrows-A' => array( + 0 => 0x27F0, + 1 => 0x27FF, + 2 => 83, + ), + 'Braille Patterns' => array( + 0 => 0x2800, + 1 => 0x28FF, + 2 => 84, + ), + 'Supplemental Arrows-B' => array( + 0 => 0x2900, + 1 => 0x297F, + 2 => 85, + ), + 'Miscellaneous Mathematical Symbols-B' => array( + 0 => 0x2980, + 1 => 0x29FF, + 2 => 86, + ), + 'Supplemental Mathematical Operators' => array( + 0 => 0x2A00, + 1 => 0x2AFF, + 2 => 87, + ), + 'Miscellaneous Symbols and Arrows' => array( + 0 => 0x2B00, + 1 => 0x2BFF, + 2 => 88, + ), + 'Glagolitic' => array( + 0 => 0x2C00, + 1 => 0x2C5F, + 2 => 89, + ), + 'Latin Extended-C' => array( + 0 => 0x2C60, + 1 => 0x2C7F, + 2 => 90, + ), + 'Coptic' => array( + 0 => 0x2C80, + 1 => 0x2CFF, + 2 => 91, + ), + 'Georgian Supplement' => array( + 0 => 0x2D00, + 1 => 0x2D2F, + 2 => 92, + ), + 'Tifinagh' => array( + 0 => 0x2D30, + 1 => 0x2D7F, + 2 => 93, + ), + 'Ethiopic Extended' => array( + 0 => 0x2D80, + 1 => 0x2DDF, + 2 => 94, + ), + 'Cyrillic Extended-A' => array( + 0 => 0x2DE0, + 1 => 0x2DFF, + 2 => 95, + ), + 'Supplemental Punctuation' => array( + 0 => 0x2E00, + 1 => 0x2E7F, + 2 => 96, + ), + 'CJK Radicals Supplement' => array( + 0 => 0x2E80, + 1 => 0x2EFF, + 2 => 97, + ), + 'Kangxi Radicals' => array( + 0 => 0x2F00, + 1 => 0x2FDF, + 2 => 98, + ), + 'Ideographic Description Characters' => array( + 0 => 0x2FF0, + 1 => 0x2FFF, + 2 => 99, + ), + 'CJK Symbols and Punctuation' => array( + 0 => 0x3000, + 1 => 0x303F, + 2 => 100, + ), + 'Hiragana' => array( + 0 => 0x3040, + 1 => 0x309F, + 2 => 101, + ), + 'Katakana' => array( + 0 => 0x30A0, + 1 => 0x30FF, + 2 => 102, + ), + 'Bopomofo' => array( + 0 => 0x3100, + 1 => 0x312F, + 2 => 103, + ), + 'Hangul Compatibility Jamo' => array( + 0 => 0x3130, + 1 => 0x318F, + 2 => 104, + ), + 'Kanbun' => array( + 0 => 0x3190, + 1 => 0x319F, + 2 => 105, + ), + 'Bopomofo Extended' => array( + 0 => 0x31A0, + 1 => 0x31BF, + 2 => 106, + ), + 'CJK Strokes' => array( + 0 => 0x31C0, + 1 => 0x31EF, + 2 => 107, + ), + 'Katakana Phonetic Extensions' => array( + 0 => 0x31F0, + 1 => 0x31FF, + 2 => 108, + ), + 'Enclosed CJK Letters and Months' => array( + 0 => 0x3200, + 1 => 0x32FF, + 2 => 109, + ), + 'CJK Compatibility' => array( + 0 => 0x3300, + 1 => 0x33FF, + 2 => 110, + ), + 'CJK Unified Ideographs Extension A' => array( + 0 => 0x3400, + 1 => 0x4DBF, + 2 => 111, + ), + 'Yijing Hexagram Symbols' => array( + 0 => 0x4DC0, + 1 => 0x4DFF, + 2 => 112, + ), + 'CJK Unified Ideographs' => array( + 0 => 0x4E00, + 1 => 0x9FFF, + 2 => 113, + ), + 'Yi Syllables' => array( + 0 => 0xA000, + 1 => 0xA48F, + 2 => 114, + ), + 'Yi Radicals' => array( + 0 => 0xA490, + 1 => 0xA4CF, + 2 => 115, + ), + 'Lisu' => array( + 0 => 0xA4D0, + 1 => 0xA4FF, + 2 => 116, + ), + 'Vai' => array( + 0 => 0xA500, + 1 => 0xA63F, + 2 => 117, + ), + 'Cyrillic Extended-B' => array( + 0 => 0xA640, + 1 => 0xA69F, + 2 => 118, + ), + 'Bamum' => array( + 0 => 0xA6A0, + 1 => 0xA6FF, + 2 => 119, + ), + 'Modifier Tone Letters' => array( + 0 => 0xA700, + 1 => 0xA71F, + 2 => 120, + ), + 'Latin Extended-D' => array( + 0 => 0xA720, + 1 => 0xA7FF, + 2 => 121, + ), + 'Syloti Nagri' => array( + 0 => 0xA800, + 1 => 0xA82F, + 2 => 122, + ), + 'Common Indic Number Forms' => array( + 0 => 0xA830, + 1 => 0xA83F, + 2 => 123, + ), + 'Phags-pa' => array( + 0 => 0xA840, + 1 => 0xA87F, + 2 => 124, + ), + 'Saurashtra' => array( + 0 => 0xA880, + 1 => 0xA8DF, + 2 => 125, + ), + 'Devanagari Extended' => array( + 0 => 0xA8E0, + 1 => 0xA8FF, + 2 => 126, + ), + 'Kayah Li' => array( + 0 => 0xA900, + 1 => 0xA92F, + 2 => 127, + ), + 'Rejang' => array( + 0 => 0xA930, + 1 => 0xA95F, + 2 => 128, + ), + 'Hangul Jamo Extended-A' => array( + 0 => 0xA960, + 1 => 0xA97F, + 2 => 129, + ), + 'Javanese' => array( + 0 => 0xA980, + 1 => 0xA9DF, + 2 => 130, + ), + 'Cham' => array( + 0 => 0xAA00, + 1 => 0xAA5F, + 2 => 131, + ), + 'Myanmar Extended-A' => array( + 0 => 0xAA60, + 1 => 0xAA7F, + 2 => 132, + ), + 'Tai Viet' => array( + 0 => 0xAA80, + 1 => 0xAADF, + 2 => 133, + ), + 'Ethiopic Extended-A' => array( + 0 => 0xAB00, + 1 => 0xAB2F, + 2 => 134, + ), + 'Meetei Mayek' => array( + 0 => 0xABC0, + 1 => 0xABFF, + 2 => 135, + ), + 'Hangul Syllables' => array( + 0 => 0xAC00, + 1 => 0xD7AF, + 2 => 136, + ), + 'Hangul Jamo Extended-B' => array( + 0 => 0xD7B0, + 1 => 0xD7FF, + 2 => 137, + ), + 'High Surrogates' => array( + 0 => 0xD800, + 1 => 0xDB7F, + 2 => 138, + ), + 'High Private Use Surrogates' => array( + 0 => 0xDB80, + 1 => 0xDBFF, + 2 => 139, + ), + 'Low Surrogates' => array( + 0 => 0xDC00, + 1 => 0xDFFF, + 2 => 140, + ), + 'Private Use Area' => array( + 0 => 0xE000, + 1 => 0xF8FF, + 2 => 141, + ), + 'CJK Compatibility Ideographs' => array( + 0 => 0xF900, + 1 => 0xFAFF, + 2 => 142, + ), + 'Alphabetic Presentation Forms' => array( + 0 => 0xFB00, + 1 => 0xFB4F, + 2 => 143, + ), + 'Arabic Presentation Forms-A' => array( + 0 => 0xFB50, + 1 => 0xFDFF, + 2 => 144, + ), + 'Variation Selectors' => array( + 0 => 0xFE00, + 1 => 0xFE0F, + 2 => 145, + ), + 'Vertical Forms' => array( + 0 => 0xFE10, + 1 => 0xFE1F, + 2 => 146, + ), + 'Combining Half Marks' => array( + 0 => 0xFE20, + 1 => 0xFE2F, + 2 => 147, + ), + 'CJK Compatibility Forms' => array( + 0 => 0xFE30, + 1 => 0xFE4F, + 2 => 148, + ), + 'Small Form Variants' => array( + 0 => 0xFE50, + 1 => 0xFE6F, + 2 => 149, + ), + 'Arabic Presentation Forms-B' => array( + 0 => 0xFE70, + 1 => 0xFEFF, + 2 => 150, + ), + 'Halfwidth and Fullwidth Forms' => array( + 0 => 0xFF00, + 1 => 0xFFEF, + 2 => 151, + ), + 'Specials' => array( + 0 => 0xFFF0, + 1 => 0xFFFF, + 2 => 152, + ), + 'Linear B Syllabary' => array( + 0 => 0x10000, + 1 => 0x1007F, + 2 => 153, + ), + 'Linear B Ideograms' => array( + 0 => 0x10080, + 1 => 0x100FF, + 2 => 154, + ), + 'Aegean Numbers' => array( + 0 => 0x10100, + 1 => 0x1013F, + 2 => 155, + ), + 'Ancient Greek Numbers' => array( + 0 => 0x10140, + 1 => 0x1018F, + 2 => 156, + ), + 'Ancient Symbols' => array( + 0 => 0x10190, + 1 => 0x101CF, + 2 => 157, + ), + 'Phaistos Disc' => array( + 0 => 0x101D0, + 1 => 0x101FF, + 2 => 158, + ), + 'Lycian' => array( + 0 => 0x10280, + 1 => 0x1029F, + 2 => 159, + ), + 'Carian' => array( + 0 => 0x102A0, + 1 => 0x102DF, + 2 => 160, + ), + 'Old Italic' => array( + 0 => 0x10300, + 1 => 0x1032F, + 2 => 161, + ), + 'Gothic' => array( + 0 => 0x10330, + 1 => 0x1034F, + 2 => 162, + ), + 'Ugaritic' => array( + 0 => 0x10380, + 1 => 0x1039F, + 2 => 163, + ), + 'Old Persian' => array( + 0 => 0x103A0, + 1 => 0x103DF, + 2 => 164, + ), + 'Deseret' => array( + 0 => 0x10400, + 1 => 0x1044F, + 2 => 165, + ), + 'Shavian' => array( + 0 => 0x10450, + 1 => 0x1047F, + 2 => 166, + ), + 'Osmanya' => array( + 0 => 0x10480, + 1 => 0x104AF, + 2 => 167, + ), + 'Cypriot Syllabary' => array( + 0 => 0x10800, + 1 => 0x1083F, + 2 => 168, + ), + 'Imperial Aramaic' => array( + 0 => 0x10840, + 1 => 0x1085F, + 2 => 169, + ), + 'Phoenician' => array( + 0 => 0x10900, + 1 => 0x1091F, + 2 => 170, + ), + 'Lydian' => array( + 0 => 0x10920, + 1 => 0x1093F, + 2 => 171, + ), + 'Kharoshthi' => array( + 0 => 0x10A00, + 1 => 0x10A5F, + 2 => 172, + ), + 'Old South Arabian' => array( + 0 => 0x10A60, + 1 => 0x10A7F, + 2 => 173, + ), + 'Avestan' => array( + 0 => 0x10B00, + 1 => 0x10B3F, + 2 => 174, + ), + 'Inscriptional Parthian' => array( + 0 => 0x10B40, + 1 => 0x10B5F, + 2 => 175, + ), + 'Inscriptional Pahlavi' => array( + 0 => 0x10B60, + 1 => 0x10B7F, + 2 => 176, + ), + 'Old Turkic' => array( + 0 => 0x10C00, + 1 => 0x10C4F, + 2 => 177, + ), + 'Rumi Numeral Symbols' => array( + 0 => 0x10E60, + 1 => 0x10E7F, + 2 => 178, + ), + 'Brahmi' => array( + 0 => 0x11000, + 1 => 0x1107F, + 2 => 179, + ), + 'Kaithi' => array( + 0 => 0x11080, + 1 => 0x110CF, + 2 => 180, + ), + 'Cuneiform' => array( + 0 => 0x12000, + 1 => 0x123FF, + 2 => 181, + ), + 'Cuneiform Numbers and Punctuation' => array( + 0 => 0x12400, + 1 => 0x1247F, + 2 => 182, + ), + 'Egyptian Hieroglyphs' => array( + 0 => 0x13000, + 1 => 0x1342F, + 2 => 183, + ), + 'Bamum Supplement' => array( + 0 => 0x16800, + 1 => 0x16A3F, + 2 => 184, + ), + 'Kana Supplement' => array( + 0 => 0x1B000, + 1 => 0x1B0FF, + 2 => 185, + ), + 'Byzantine Musical Symbols' => array( + 0 => 0x1D000, + 1 => 0x1D0FF, + 2 => 186, + ), + 'Musical Symbols' => array( + 0 => 0x1D100, + 1 => 0x1D1FF, + 2 => 187, + ), + 'Ancient Greek Musical Notation' => array( + 0 => 0x1D200, + 1 => 0x1D24F, + 2 => 188, + ), + 'Tai Xuan Jing Symbols' => array( + 0 => 0x1D300, + 1 => 0x1D35F, + 2 => 189, + ), + 'Counting Rod Numerals' => array( + 0 => 0x1D360, + 1 => 0x1D37F, + 2 => 190, + ), + 'Mathematical Alphanumeric Symbols' => array( + 0 => 0x1D400, + 1 => 0x1D7FF, + 2 => 191, + ), + 'Mahjong Tiles' => array( + 0 => 0x1F000, + 1 => 0x1F02F, + 2 => 192, + ), + 'Domino Tiles' => array( + 0 => 0x1F030, + 1 => 0x1F09F, + 2 => 193, + ), + 'Playing Cards' => array( + 0 => 0x1F0A0, + 1 => 0x1F0FF, + 2 => 194, + ), + 'Enclosed Alphanumeric Supplement' => array( + 0 => 0x1F100, + 1 => 0x1F1FF, + 2 => 195, + ), + 'Enclosed Ideographic Supplement' => array( + 0 => 0x1F200, + 1 => 0x1F2FF, + 2 => 196, + ), + 'Miscellaneous Symbols And Pictographs' => array( + 0 => 0x1F300, + 1 => 0x1F5FF, + 2 => 197, + ), + 'Emoticons' => array( + 0 => 0x1F600, + 1 => 0x1F64F, + 2 => 198, + ), + 'Transport And Map Symbols' => array( + 0 => 0x1F680, + 1 => 0x1F6FF, + 2 => 199, + ), + 'Alchemical Symbols' => array( + 0 => 0x1F700, + 1 => 0x1F77F, + 2 => 200, + ), + 'CJK Unified Ideographs Extension B' => array( + 0 => 0x20000, + 1 => 0x2A6DF, + 2 => 201, + ), + 'CJK Unified Ideographs Extension C' => array( + 0 => 0x2A700, + 1 => 0x2B73F, + 2 => 202, + ), + 'CJK Unified Ideographs Extension D' => array( + 0 => 0x2B740, + 1 => 0x2B81F, + 2 => 203, + ), + 'CJK Compatibility Ideographs Supplement' => array( + 0 => 0x2F800, + 1 => 0x2FA1F, + 2 => 204, + ), + 'Tags' => array( + 0 => 0xE0000, + 1 => 0xE007F, + 2 => 205, + ), + 'Variation Selectors Supplement' => array( + 0 => 0xE0100, + 1 => 0xE01EF, + 2 => 206, + ), + 'Supplementary Private Use Area-A' => array( + 0 => 0xF0000, + 1 => 0xFFFFF, + 2 => 207, + ), + 'Supplementary Private Use Area-B' => array( + 0 => 0x100000, + 1 => 0x10FFFF, + 2 => 208, + ), + ); + + #calling the methods of this class only statically! + private function __construct() {} + + /** + * Remove combining diactrical marks, with possibility of the restore + * Удаляет диакритические знаки в тексте, с возможностью восстановления (опция) + * + * @param string|null $s + * @param array|null $additional_chars for example: "\xc2\xad" #soft hyphen = discretionary hyphen + * @param bool $is_can_restored + * @param array|null &$restore_table + * @return string|bool|null Returns FALSE if error occurred + */ + public static function diactrical_remove($s, $additional_chars = null, $is_can_restored = false, &$restore_table = null) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + if ($additional_chars) + { + foreach ($additional_chars as $k => &$v) $v = preg_quote($v, '/'); + $re = '/((?>' . self::$diactrical_re . '|' . implode('|', $additional_chars) . ')+)/sxSX'; + } + else $re = '/((?>' . self::$diactrical_re . ')+)/sxSX'; + if (! $is_can_restored) return preg_replace($re, '', $s); + + $restore_table = array(); + $a = preg_split($re, $s, -1, PREG_SPLIT_DELIM_CAPTURE); + $c = count($a); + if ($c === 1) return $s; + $pos = 0; + $s2 = ''; + for ($i = 0; $i < $c - 1; $i += 2) + { + $s2 .= $a[$i]; + #запоминаем символьные (не байтовые!) позиции + $pos += self::strlen($a[$i]); + $restore_table['offsets'][$pos] = $a[$i + 1]; + } + $restore_table['length'] = $pos + self::strlen(end($a)); + return $s2 . end($a); + } + + /** + * Restore combining diactrical marks, removed by self::diactrical_remove() + * In Russian: + * Восстанавливает диакритические знаки в тексте, при условии, что их символьные позиции и кол-во символов не изменились! + * + * @see self::diactrical_remove() + * @param string|null $s + * @param array $restore_table + * @return string|bool|null Returns FALSE if error occurred (broken $restore_table) + */ + public static function diactrical_restore($s, array $restore_table) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + if (! $restore_table) return $s; + if (! is_int(@$restore_table['length']) || + ! is_array(@$restore_table['offsets']) || + $restore_table['length'] !== self::strlen($s)) return false; + $a = array(); + $length = $offset = 0; + $s2 = ''; + foreach ($restore_table['offsets'] as $pos => $diactricals) + { + $length = $pos - $offset; + $s2 .= self::substr($s, $offset, $length) . $diactricals; + $offset = $pos; + } + return $s2 . self::substr($s, $offset, strlen($s)); + } + + /** + * Encodes data from another character encoding to UTF-8. + * + * @param array|scalar|null $data + * @param string $charset + * @return array|scalar|null Returns FALSE if error occurred + */ + public static function convert_from($data, $charset = 'cp1251') + { + if (! ReflectionTypeHint::isValid()) return false; + return self::_convert($data, $charset, 'UTF-8'); + } + + /** + * Encodes data from UTF-8 to another character encoding. + * + * @param array|scalar|null $data + * @param string $charset + * @return array|scalar|null Returns FALSE if error occurred + */ + public static function convert_to($data, $charset = 'cp1251') + { + if (! ReflectionTypeHint::isValid()) return false; + return self::_convert($data, 'UTF-8', $charset); + } + + /** + * Recoding the data of any structure to/from UTF-8. + * Arrays traversed recursively, recoded keys and values. + * + * @see mb_encoding_aliases() + * @param array|scalar|null $data + * @param string $charset_from + * @param string $charset_to + * @return array|scalar|null Returns FALSE if error occurred + */ + private static function _convert($data, $charset_from, $charset_to) + { + if (! ReflectionTypeHint::isValid()) return false; #for recursive calls + if ($charset_from === $charset_to) return $data; + if (is_array($data)) + { + $d = array(); + foreach ($data as $k => &$v) + { + $k = self::_convert($k, $charset_from, $charset_to); + if ($k === false) return false; + $d[$k] = self::_convert($v, $charset_from, $charset_to); + if ($d[$k] === false && ! is_bool($v)) return false; + } + return $d; + } + if (is_string($data)) + { + #smart behaviour for errors protected + speed improve + if ($charset_from === 'UTF-8' && ! self::is_utf8($data)) return $data; + if ($charset_to === 'UTF-8' && self::is_utf8($data)) return $data; + + #since PHP-5.3.x iconv() faster then mb_convert_encoding() + if (function_exists('iconv')) return iconv($charset_from, $charset_to . '//IGNORE//TRANSLIT', $data); + if (function_exists('mb_convert_encoding')) return mb_convert_encoding($data, $charset_to, $charset_from); + + #charset_from + if ($charset_from === 'UTF-16' || $charset_from === 'UCS-2') return self::_convert_from_utf16($data); + if ($charset_from === 'cp1251' || $charset_from === 'cp1259') return strtr($data, self::$cp1259_table); + if ($charset_from === 'koi8-r' || $charset_from === 'KOI8-R') return strtr(convert_cyr_string($data, 'k', 'w'), self::$cp1259_table); + if ($charset_from === 'iso8859-5') return strtr(convert_cyr_string($data, 'i', 'w'), self::$cp1259_table); + if ($charset_from === 'cp866') return strtr(convert_cyr_string($data, 'a', 'w'), self::$cp1259_table); + if ($charset_from === 'mac-cyrillic') return strtr(convert_cyr_string($data, 'm', 'w'), self::$cp1259_table); + + #charset_to + if ($charset_to === 'cp1251' || $charset_to === 'cp1259') return strtr($data, array_flip(self::$cp1259_table)); + + #last trying + if (function_exists('recode_string')) + { + $s = @recode_string($charset_from . '..' . $charset_to, $data); + if (is_string($s)) return $s; + } + + trigger_error('Convert "' . $charset_from . '" --> "' . $charset_to . '" is not supported native, "iconv" or "mbstring" extension required', E_USER_WARNING); + return false; + } + return $data; + } + + /** + * Convert UTF-16 / UCS-2 encoding string to UTF-8. + * Surrogates UTF-16 are supported! + * + * In Russian: + * Преобразует строку из кодировки UTF-16 / UCS-2 в UTF-8. + * Суррогаты UTF-16 поддерживаются! + * + * @param string $s + * @param string $type 'BE' -- big endian byte order + * 'LE' -- little endian byte order + * @param bool $to_array returns array chars instead whole string? + * @return string|array|bool UTF-8 string, array chars or FALSE if error occurred + */ + private static function _convert_from_utf16($s, $type = 'BE', $to_array = false) + { + static $types = array( + 'BE' => 'n', #unsigned short (always 16 bit, big endian byte order) + 'LE' => 'v', #unsigned short (always 16 bit, little endian byte order) + ); + if (! array_key_exists($type, $types)) + { + trigger_error('Unexpected value in 2-nd parameter, "' . $type . '" given!', E_USER_WARNING); + return false; + } + #the fastest way: + if (function_exists('iconv') || function_exists('mb_convert_encoding')) + { + if (function_exists('iconv')) $s = iconv('UTF-16' . $type, 'UTF-8', $s); + elseif (function_exists('mb_convert_encoding')) $s = mb_convert_encoding($s, 'UTF-8', 'UTF-16' . $type); + if (! $to_array) return $s; + return self::str_split($s); + } + + /* + http://en.wikipedia.org/wiki/UTF-16 + + The improvement that UTF-16 made over UCS-2 is its ability to encode + characters in planes 1-16, not just those in plane 0 (BMP). + + UTF-16 represents non-BMP characters (those from U+10000 through U+10FFFF) + using a pair of 16-bit words, known as a surrogate pair. + First 1000016 is subtracted from the code point to give a 20-bit value. + This is then split into two separate 10-bit values each of which is represented + as a surrogate with the most significant half placed in the first surrogate. + To allow safe use of simple word-oriented string processing, separate ranges + of values are used for the two surrogates: 0xD800-0xDBFF for the first, most + significant surrogate and 0xDC00-0xDFFF for the second, least significant surrogate. + + For example, the character at code point U+10000 becomes the code unit sequence 0xD800 0xDC00, + and the character at U+10FFFD, the upper limit of Unicode, becomes the sequence 0xDBFF 0xDFFD. + Unicode and ISO/IEC 10646 do not, and will never, assign characters to any of the code points + in the U+D800-U+DFFF range, so an individual code value from a surrogate pair does not ever + represent a character. + + http://www.russellcottrell.com/greek/utilities/SurrogatePairCalculator.htm + http://www.russellcottrell.com/greek/utilities/UnicodeRanges.htm + + Conversion of a Unicode scalar value S to a surrogate pair : + H = Math.floor((S - 0x10000) / 0x400) + 0xD800; + L = ((S - 0x10000) % 0x400) + 0xDC00; + The conversion of a surrogate pair to a scalar value: + N = ((H - 0xD800) * 0x400) + (L - 0xDC00) + 0x10000; + */ + $a = array(); + $hi = false; + foreach (unpack($types[$type] . '*', $s) as $codepoint) + { + #surrogate process + if ($hi !== false) + { + $lo = $codepoint; + if ($lo < 0xDC00 || $lo > 0xDFFF) $a[] = "\xEF\xBF\xBD"; #U+FFFD REPLACEMENT CHARACTER (for broken char) + else + { + $codepoint = (($hi - 0xD800) * 0x400) + ($lo - 0xDC00) + 0x10000; + $a[] = self::chr($codepoint); + } + $hi = false; + } + elseif ($codepoint < 0xD800 || $codepoint > 0xDBFF) $a[] = self::chr($codepoint); #not surrogate + else $hi = $codepoint; #surrogate was found + } + return $to_array ? $a : implode('', $a); + } + + /** + * Strips out device control codes in the ASCII range. + * + * @param string|null String to clean + * @return string|bool|null Returns FALSE if error occurred + */ + public static function strict($s) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]+/sSX', '', $s); + } + + /** + * Check the data accessory to the class of characters ASCII. + * For null, integer, float, boolean returns TRUE. + * + * Массивы обходятся рекурсивно, если в хотябы одном элементе массива + * его значение не ASCII, возвращается FALSE. + * + * @param array|scalar|null $data + * @return bool + */ + public static function is_ascii($data) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_array($data)) + { + foreach ($data as $k => &$v) + { + if (! self::is_ascii($k) || ! self::is_ascii($v)) return false; + } + return true; + } + #ltrim() little faster then preg_match() + #if (is_string($data)) return preg_match('/^[\x00-\x7f]*$/sSX', $data); #deprecated + if (is_string($data)) return ltrim($data, "\x00..\x7f") === ''; + if (is_scalar($data) || is_null($data)) return true; #~ null, integer, float, boolean + return false; #object or resource + } + + /** + * Returns true if data is valid UTF-8 and false otherwise. + * For null, integer, float, boolean returns TRUE. + * + * The arrays are traversed recursively, if At least one element of the array + * its value is not in UTF-8, returns FALSE. + * + * @link http://www.w3.org/International/questions/qa-forms-utf-8.html + * @link http://ru3.php.net/mb_detect_encoding + * @link http://webtest.philigon.ru/articles/utf8/ + * @link http://unicode.coeurlumiere.com/ + * @param array|scalar|null $data + * @param bool $is_strict strict the range of ASCII? + * @return bool + */ + public static function is_utf8($data, $is_strict = true) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_array($data)) + { + foreach ($data as $k => &$v) + { + if (! self::is_utf8($k, $is_strict) || ! self::is_utf8($v, $is_strict)) return false; + } + return true; + } + if (is_string($data)) + { + if (! preg_match('~~suSX', $data)) return false; + if (function_exists('preg_last_error') && preg_last_error() !== PREG_NO_ERROR) return false; + #preg_match('~~suSX') much faster (up to 4 times), then mb_check_encoding($data, 'UTF-8')! + #if (function_exists('mb_check_encoding') && ! mb_check_encoding($data, 'UTF-8')) return false; #DEPRECATED + if ($is_strict && preg_match('/[^\x09\x0A\x0D\x20-\xBF\xC2-\xF7]/sSX', $data)) return false; + return true; + } + if (is_scalar($data) || is_null($data)) return true; #~ null, integer, float, boolean + return false; #object or resource + } + + /** + * Tries to detect if a string is in Unicode encoding + * + * @deprecated Slowly, use self::is_utf8() instead + * @see self::is_utf8() + * @param string $s текст + * @param bool $is_strict строгая проверка диапазона ASCII? + * @return bool + */ + public static function check($s, $is_strict = true) + { + if (! ReflectionTypeHint::isValid()) return false; + for ($i = 0, $len = strlen($s); $i < $len; $i++) + { + $c = ord($s[$i]); + if ($c < 0x80) #1 byte 0bbbbbbb + { + if ($is_strict === false || ($c > 0x1F && $c < 0x7F) || $c == 0x09 || $c == 0x0A || $c == 0x0D) continue; + } + if (($c & 0xE0) == 0xC0) $n = 1; #2 bytes 110bbbbb 10bbbbbb + elseif (($c & 0xF0) == 0xE0) $n = 2; #3 bytes 1110bbbb 10bbbbbb 10bbbbbb + elseif (($c & 0xF8) == 0xF0) $n = 3; #4 bytes 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb + elseif (($c & 0xFC) == 0xF8) $n = 4; #5 bytes 111110bb 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb + elseif (($c & 0xFE) == 0xFC) $n = 5; #6 bytes 1111110b 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb + else return false; #does not match any model + #n bytes matching 10bbbbbb follow ? + for ($j = 0; $j < $n; $j++) + { + $i++; + if ($i == $len || ((ord($s[$i]) & 0xC0) != 0x80) ) return false; + } + } + return true; + } + + /** + * Check the data in UTF-8 charset on given ranges of the standard UNICODE. + * The suitable alternative to regular expressions. + * + * For null, integer, float, boolean returns TRUE. + * + * Arrays traversed recursively (keys and values). + * At least if one array element value is not passed checking, it returns FALSE. + * + * @example + * #A simple check the standard named ranges: + * UTF8::blocks_check('поисковые системы Google и Yandex', array('Basic Latin', 'Cyrillic')); + * #You can check the named, direct ranges or codepoints together: + * UTF8::blocks_check('поисковые системы Google и Yandex', array(array(0x20, 0x7E), #[\x20-\x7E] + * array(0x0410, 0x044F), #[A-Яa-я] + * 0x0401, #russian yo (Ё) + * 0x0451, #russian ye (ё) + * 'Arrows', + * )); + * + * @link http://www.unicode.org/charts/ + * @param array|scalar|null $data + * @param array|string $blocks + * @return bool Возвращает TRUE, если все символы из текста принадлежат указанным диапазонам + * и FALSE в противном случае или для разбитого UTF-8. + */ + public static function blocks_check($data, $blocks) + { + if (! ReflectionTypeHint::isValid()) return false; + + if (is_array($data)) + { + foreach ($data as $k => &$v) + { + if (! self::blocks_check($k, $blocks) || ! self::blocks_check($v, $blocks)) return false; + } + return true; + } + + if (is_string($data)) + { + $chars = self::str_split($data); + if ($chars === false) return false; #broken UTF-8 + unset($data); #memory free + $skip = array(); #save to cache already checked symbols + foreach ($chars as $i => $char) + { + if (array_key_exists($char, $skip)) continue; #speed improve + $codepoint = self::ord($char); + if ($codepoint === false) return false; #broken UTF-8 + $is_valid = false; + $blocks = (array)$blocks; + foreach ($blocks as $j => $block) + { + if (is_string($block)) + { + if (! array_key_exists($block, self::$unicode_blocks)) + { + trigger_error('Unknown block "' . $block . '"!', E_USER_WARNING); + return false; + } + list ($min, $max) = self::$unicode_blocks[$block]; + } + elseif (is_array($block)) list ($min, $max) = $block; + elseif (is_int($block)) $min = $max = $block; + else trigger_error('A string/array/int type expected for block[' . $j . ']!', E_USER_ERROR); + if ($codepoint >= $min && $codepoint <= $max) + { + $is_valid = true; + break; + } + }#foreach + if (! $is_valid) return false; + $skip[$char] = null; + }#foreach + return true; + } + if (is_scalar($data) || is_null($data)) return true; #~ null, integer, float, boolean + return false; #object or resource + } + + /** + * Recode $_GET, $_POST, $_COOKIE, $_REQUEST, $_FILES from $charset encoding to UTF-8, if necessary. + * A side effect is a positive protection against XSS attacks with non-printable characters on the vulnerable PHP function. + * Thus web forms can be sent to the server in 2-encoding: $charset and UTF-8. + * For example: ?тест[тест]=тест + * + * Алгоритм работы: + * 1) Функция проверяет массивы $_GET, $_POST, $_COOKIE, $_REQUEST, $_FILES + * на корректность значений элементов кодировке UTF-8. + * 2) Значения не в UTF-8 принимаются как $charset и конвертируется в UTF-8, + * при этом байты от 0x00 до 0x7F (ASCII) сохраняются как есть. + * 3) Сконвертированные значения снова проверяются. + * Если данные опять не в кодировке UTF-8, то они считаются разбитыми и функция возвращает FALSE. + * + * NOTICE + * Функция должна вызываться после self::unescape_request()! + * + * @see self::unescape_request() + * @param bool $is_hex2bin Декодировать HEX-данные? + * Пример: 0xd09ec2a0d0bad0bed0bcd0bfd0b0d0bdd0b8d0b8 => О компании + * Параметры в URL адресах иногда бывает удобно кодировать не функцией rawurlencode(), + * а использовать следующий механизм (к тому же кодирующий данные более компактно): + * '0x' . bin2hex($string) + * @param string $charset + * @return bool Возвращает TRUE, если все значения элементов массивов в кодировке UTF-8 + * и FALSE + E_USER_WARNING в противном случае. + */ + public static function autoconvert_request($is_hex2bin = false, $charset = 'cp1251') + { + if (! ReflectionTypeHint::isValid()) return false; + $is_converted = false; + $is_broken = false; + foreach (array('_GET', '_POST', '_COOKIE', '_FILES') as $k => $v) + { + if (! array_key_exists($v, $GLOBALS)) continue; + #использовать array_walk_recursive() не предоставляется возможным, + #т.к. его callback функция не поддерживает передачу ключа по ссылке + $GLOBALS[$v] = self::_autoconvert_request_recursive($GLOBALS[$v], $is_converted, $is_broken, $is_hex2bin, $charset); + if ($is_broken) + { + trigger_error('Array $' . $v . ' does not have keys/values in UTF-8 charset!', E_USER_WARNING); + return false; + } + } + if ($is_converted) + { + $_REQUEST = + (isset($_COOKIE) ? $_COOKIE : array()) + + (isset($_POST) ? $_POST : array()) + + (isset($_GET) ? $_GET : array()); + } + return true; + } + + private static function _autoconvert_request_recursive(&$data, &$is_converted, &$is_broken, $is_hex2bin, $charset) + { + if ($is_broken) return $data; #speed improve + if (is_array($data)) + { + $d = array(); + foreach ($data as $k => &$v) + { + $k = self::_autoconvert_request($k, $is_converted, $is_broken, $is_hex2bin, $charset); + if ($is_broken) return $data; #speed improve + $d[$k] = self::_autoconvert_request_recursive($v, $is_converted, $is_broken, $is_hex2bin, $charset); + if ($is_broken) return $data; #speed improve + } + return $d; + } + return self::_autoconvert_request($data, $is_converted, $is_broken, $is_hex2bin, $charset); + } + + private static function _autoconvert_request(&$s, &$is_converted, &$is_broken, $is_hex2bin, $charset) + { + #regexp speed improve by using strpos() + if ($is_hex2bin && strpos($s, '0x') === 0 && preg_match('/^0x((?:[\da-fA-F]{2})+)$/sSX', $s, $m)) + { + $s = pack('H' . strlen($m[1]), $m[1]); #hex2bin() + $is_converted = true; + } + if (! self::is_utf8($s)) + { + $s = self::convert_from($s, $charset); + if ($s === false) $is_broken = true; + elseif (! self::is_utf8($s)) + { + trigger_error('String 0x ' . substr(bin2hex($s), 0, 100) . '... is not UTF-8!', E_USER_WARNING); + $is_broken = true; + } + else $is_converted = true; + } + return $s; + } + + /** + * Сравнение строк + * + * @param string|null $s1 + * @param string|null $s2 + * @param string $locale For example, 'en_CA', 'ru_RU' + * @return int|bool|null Returns FALSE if error occurred + * Returns < 0 if $s1 is less than $s2; + * > 0 if $s1 is greater than $s2; + * 0 if they are equal. + */ + public static function strcmp($s1, $s2, $locale = '') + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s1) || is_null($s2)) return null; + if (! function_exists('collator_create')) return strcmp($s1, $s2); + # PHP 5 >= 5.3.0, PECL intl >= 1.0.0 + # If empty string ("") or "root" are passed, UCA rules will be used. + $c = new Collator($locale); + if (! $c) + { + # Returns an "empty" object on error. You can use intl_get_error_code() and/or intl_get_error_message() to know what happened. + trigger_error(intl_get_error_message(), E_USER_WARNING); + return false; + } + return $c->compare($s1, $s2); + } + + /** + * Сравнение строк для N первых символов + * + * @param string|null $s1 + * @param string|null $s2 + * @param int $length + * @return int|bool|null Returns FALSE if error occurred + * Returns < 0 if $s1 is less than $s2; + * > 0 if $s1 is greater than $s2; + * 0 if they are equal. + */ + public static function strncmp($s1, $s2, $length) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s1) || is_null($s2)) return null; + return self::strcmp(self::substr($s1, 0, $length), self::substr($s2, 0, $length)); + } + + /** + * Implementation strcasecmp() function for UTF-8 encoding string. + * + * @param string|null $s1 + * @param string|null $s2 + * @return int|bool|null Returns FALSE if error occurred + * Returns < 0 if $s1 is less than $s2; + * > 0 if $s1 is greater than $s2; + * 0 if they are equal. + */ + public static function strcasecmp($s1, $s2) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s1) || is_null($s2)) return null; + return self::strcmp(self::lowercase($s1), self::lowercase($s2)); + } + + /** + * Converts a UTF-8 string to a UNICODE codepoints + * + * @param string|null $s UTF-8 string + * @return array|bool|null Unicode codepoints + * Returns FALSE if $s broken (not UTF-8) + */ + public static function to_unicode($s) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + $s2 = null; + #since PHP-5.3.x iconv() little faster then mb_convert_encoding() + if (function_exists('iconv')) $s2 = @iconv('UTF-8', 'UCS-4BE', $s); + elseif (function_exists('mb_convert_encoding')) $s2 = @mb_convert_encoding($s, 'UCS-4BE', 'UTF-8'); + if (is_string($s2)) return array_values(unpack('N*', $s2)); + if ($s2 !== null) return false; + + $a = self::str_split($s); + if ($a === false) return false; + return array_map(array(__CLASS__, 'ord'), $a); + } + + /** + * Converts a UNICODE codepoints to a UTF-8 string + * + * @param array|null $a Unicode codepoints + * @return string|bool|null UTF-8 string + * Returns FALSE if error occurred + */ + public static function from_unicode($a) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($a)) return $a; + + #since PHP-5.3.x iconv() little faster then mb_convert_encoding() + if (function_exists('iconv')) + { + array_walk($a, function(&$cp) { $cp = pack('N', $cp); }); + $s = @iconv('UCS-4BE', 'UTF-8', implode('', $a)); + if (! is_string($s)) return false; + return $s; + } + if (function_exists('mb_convert_encoding')) + { + array_walk($a, function(&$cp) { $cp = pack('N', $cp); }); + $s = mb_convert_encoding(implode('', $a), 'UTF-8', 'UCS-4BE'); + if (! is_string($s)) return false; + return $s; + } + + return implode('', array_map(array(__CLASS__, 'chr'), $a)); + } + + /** + * Converts a UTF-8 character to a UNICODE codepoint + * + * @param string|null $char UTF-8 character + * @return int|bool|null Unicode codepoint + * Returns FALSE if $char broken (not UTF-8) + */ + public static function ord($char) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($char)) return $char; + + static $cache = array(); + if (array_key_exists($char, $cache)) return $cache[$char]; #speed improve + + switch (strlen($char)) + { + case 1 : return $cache[$char] = ord($char); + case 2 : return $cache[$char] = (ord($char{1}) & 63) | + ((ord($char{0}) & 31) << 6); + case 3 : return $cache[$char] = (ord($char{2}) & 63) | + ((ord($char{1}) & 63) << 6) | + ((ord($char{0}) & 15) << 12); + case 4 : return $cache[$char] = (ord($char{3}) & 63) | + ((ord($char{2}) & 63) << 6) | + ((ord($char{1}) & 63) << 12) | + ((ord($char{0}) & 7) << 18); + default : + trigger_error('Character 0x' . bin2hex($char) . ' is not UTF-8!', E_USER_WARNING); + return false; + } + } + + /** + * Converts a UNICODE codepoint to a UTF-8 character + * + * @param int|digit|null $cp Unicode codepoint + * @return string|bool|null UTF-8 character + * Returns FALSE if error occurred + */ + public static function chr($cp) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($cp)) return $cp; + + static $cache = array(); + if (array_key_exists($cp, $cache)) return $cache[$cp]; #speed improve + + if ($cp <= 0x7f) return $cache[$cp] = chr($cp); + if ($cp <= 0x7ff) return $cache[$cp] = chr(0xc0 | ($cp >> 6)) . + chr(0x80 | ($cp & 0x3f)); + if ($cp <= 0xffff) return $cache[$cp] = chr(0xe0 | ($cp >> 12)) . + chr(0x80 | (($cp >> 6) & 0x3f)) . + chr(0x80 | ($cp & 0x3f)); + if ($cp <= 0x10ffff) return $cache[$cp] = chr(0xf0 | ($cp >> 18)) . + chr(0x80 | (($cp >> 12) & 0x3f)) . + chr(0x80 | (($cp >> 6) & 0x3f)) . + chr(0x80 | ($cp & 0x3f)); + #U+FFFD REPLACEMENT CHARACTER + return $cache[$cp] = "\xEF\xBF\xBD"; + } + + /** + * Implementation chunk_split() function for UTF-8 encoding string. + * + * @param string|null $s + * @param int|digit|null $length + * @param string|null $glue + * @return string|bool|null Returns FALSE if error occurred + */ + public static function chunk_split($s, $length = null, $glue = null) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + $length = intval($length); + $glue = strval($glue); + if ($length < 1) $length = 76; + if ($glue === '') $glue = "\r\n"; + if (! is_array($a = self::str_split($s, $length))) return false; + return implode($glue, $a); + } + + /** + * Changes all keys in an array + * + * @param array|null $a + * @param int $mode {CASE_LOWER|CASE_UPPER} + * @return array|bool|null Returns FALSE if error occurred + */ + public static function array_change_key_case($a, $mode) + { + if (! ReflectionTypeHint::isValid()) return false; + if (! is_array($a)) return $a; + $a2 = array(); + foreach ($a as $k => $v) + { + if (is_string($k)) + { + $k = self::convert_case($k, $mode); + if ($k === false) return false; + } + $a2[$k] = $v; + } + return $a2; + } + + /** + * Конвертирует регистр букв в данных в кодировке UTF-8. + * Массивы обходятся рекурсивно, при этом конвертируются только значения + * в элементах массива, а ключи остаются без изменений. + * Для конвертирования только ключей используйте метод self::array_change_key_case(). + * + * @see self::array_change_key_case() + * @link http://www.unicode.org/charts/PDF/U0400.pdf + * @link http://ru.wikipedia.org/wiki/ISO_639-1 + * @param array|scalar|null $data Данные произвольной структуры + * @param int $mode {CASE_LOWER|CASE_UPPER} + * @param bool $is_ascii_optimization for speed improve + * @return scalar|bool|null Returns FALSE if error occurred + */ + public static function convert_case($data, $mode, $is_ascii_optimization = true) + { + if (! ReflectionTypeHint::isValid()) return false; + + if (is_array($data)) + { + foreach ($data as $k => &$v) $v = self::convert_case($v, $mode); + return $data; + } + if (! is_string($data) || ! $data) return $data; + + if ($mode === CASE_UPPER) + { + if ($is_ascii_optimization && self::is_ascii($data)) return strtoupper($data); #speed improve! + #deprecated, since PHP-5.3.x strtr() 2-3 times faster then mb_strtolower() + #if (function_exists('mb_strtoupper')) return mb_strtoupper($data, 'utf-8'); + return strtr($data, array_flip(self::$convert_case_table)); + } + if ($mode === CASE_LOWER) + { + if ($is_ascii_optimization && self::is_ascii($data)) return strtolower($data); #speed improve! + #deprecated, since PHP-5.3.x strtr() 2-3 times faster then mb_strtolower() + #if (function_exists('mb_strtolower')) return mb_strtolower($data, 'utf-8'); + return strtr($data, self::$convert_case_table); + } + trigger_error('Parameter 2 should be a constant of CASE_LOWER or CASE_UPPER!', E_USER_WARNING); + return $data; + } + + /** + * Convert a data to lower case + * + * @param array|scalar|null $data + * @return scalar|bool|null Returns FALSE if error occurred */ + public static function lowercase($data) + { + if (! ReflectionTypeHint::isValid()) return false; + return self::convert_case($data, CASE_LOWER); + } + + /** + * Convert a data to upper case + * + * @param array|scalar|null $data + * @return scalar|null Returns FALSE if error occurred + */ + public static function uppercase($data) + { + if (! ReflectionTypeHint::isValid()) return false; + return self::convert_case($data, CASE_UPPER); + } + + /** + * Convert a data to lower case + * + * @param array|scalar|null $data + * @return scalar|bool|null Returns FALSE if error occurred + */ + public static function strtolower($data) + { + if (! ReflectionTypeHint::isValid()) return false; + return self::convert_case($data, CASE_LOWER); + } + + /** + * Convert a data to upper case + * + * @param array|scalar|null $data + * @return scalar|null Returns FALSE if error occurred + */ + public static function strtoupper($data) + { + if (! ReflectionTypeHint::isValid()) return false; + return self::convert_case($data, CASE_UPPER); + } + + + /** + * Convert all HTML entities to native UTF-8 characters + * Функция декодирует гораздо больше именованных сущностей, чем стандартная html_entity_decode() + * Все dec и hex сущности так же переводятся в UTF-8. + * + * Example: '"' or '"' or '"' will be converted to '"'. + * + * @link http://www.htmlhelp.com/reference/html40/entities/ + * @link http://www.alanwood.net/demos/ent4_frame.html (HTML 4.01 Character Entity References) + * @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset1.asp?frame=true + * @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset2.asp?frame=true + * @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset3.asp?frame=true + * + * @param scalar|null $s + * @param bool $is_special_chars Дополнительно обрабатывать специальные html сущности? (< > & ") + * @return scalar|null Returns FALSE if error occurred + */ + public static function html_entity_decode($s, $is_special_chars = false) + { + if (! ReflectionTypeHint::isValid()) return false; + if (! is_string($s)) return $s; + + #speed improve + if (strlen($s) < 4 #по минимальной длине сущности - 4 байта: &#d; &xx; + || ($pos = strpos($s, '&') === false) || strpos($s, ';', $pos) === false) return $s; + + $table = self::$html_entity_table; + if ($is_special_chars) $table += self::$html_special_chars_table; + + #replace named entities + $s = strtr($s, $table); + #block below deprecated, since PHP-5.3.x strtr() 1.5 times faster + if (0 && preg_match_all('/&[a-zA-Z]++\d*+;/sSX', $s, $m, null, $pos)) + { + foreach (array_unique($m[0]) as $entity) + { + if (array_key_exists($entity, $table)) $s = str_replace($entity, $table[$entity], $s); + } + } + + #заменяем числовые dec и hex сущности: + if (strpos($s, '&#') !== false) #speed improve + { + $class = __CLASS__; + $html_special_chars_table_flipped = array_flip(self::$html_special_chars_table); + $s = preg_replace_callback('/&#((x)[\da-fA-F]{1,6}+|\d{1,7}+);/sSX', + function (array $m) use ($class, $html_special_chars_table_flipped, $is_special_chars) + { + $codepoint = isset($m[2]) && $m[2] === 'x' ? hexdec($m[1]) : $m[1]; + if (! $is_special_chars) + { + $char = pack('C', $codepoint); + if (array_key_exists($char, $html_special_chars_table_flipped)) return $html_special_chars_table_flipped[$char]; + } + return $class::chr($codepoint); + }, $s); + } + return $s; + } + + /** + * Convert special UTF-8 characters to HTML entities. + * Функция кодирует гораздо больше именованных сущностей, чем стандартная htmlentities() + * + * @link http://www.htmlhelp.com/reference/html40/entities/ + * @link http://www.alanwood.net/demos/ent4_frame.html (HTML 4.01 Character Entity References) + * @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset1.asp?frame=true + * @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset2.asp?frame=true + * @link http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset3.asp?frame=true + * + * @param scalar|null $s + * @param bool $is_special_chars_only Обрабатывать только специальные html сущности? (< > & ") + * @return scalar|null Returns FALSE if error occurred + */ + public static function html_entity_encode($s, $is_special_chars_only = false) + { + if (! ReflectionTypeHint::isValid()) return false; + if (! is_string($s)) return $s; + + #if ($is_special_chars_only) return strtr($s, array_flip(self::$html_special_chars_table)); + if ($is_special_chars_only) return htmlspecialchars($s); + + #replace UTF-8 chars to named entities: + $s = strtr($s, array_flip(self::$html_entity_table)); + #block below deprecated, since PHP-5.3.x strtr() 3 times faster + if (0 && preg_match_all('~(?> [\xc2\xc3\xc5\xc6\xcb\xce\xcf][\x80-\xbf] #2 bytes + | \xe2[\x80-\x99][\x82-\xac] #3 bytes + ) + ~sxSX', $s, $m)) + { + $table = array_flip(self::$html_entity_table); + foreach (array_unique($m[0]) as $char) + { + if (array_key_exists($char, $table)) $s = str_replace($char, $table[$char], $s); + } + } + + return $s; + } + + /** + * Make regular expression for case insensitive match + * Example (non ASCII): "123_слово_test" => "123_(с|С)(л|Л)(о|О)(в|В)(о|О)_[tT][eE][sS][tT]" + * Example (only ASCII): "123_test" => "(?i:123_test)" + * + * @param string $s + * @param string|null $delimiter If the optional delimiter is specified, it will also be escaped. + * This is useful for escaping the delimiter that is required by the PCRE functions. + * The / is the most commonly used delimiter. + * @return string|bool|null Returns FALSE if error occurred + */ + public static function preg_quote_case_insensitive($s, $delimiter = null) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + if (self::is_ascii($s)) return '(?i:' . preg_quote($s, $delimiter) . ')'; #speed improve + + $s_re = ''; + $s_lc = UTF8::lowercase($s); if ($s_lc === false) return false; + $s_uc = UTF8::uppercase($s); if ($s_uc === false) return false; + + $chars_lc = UTF8::str_split($s_lc); if ($chars_lc === false) return false; + $chars_uc = UTF8::str_split($s_uc); if ($chars_uc === false) return false; + + foreach ($chars_lc as $i => $char) + { + if ($chars_lc[$i] === $chars_uc[$i]) + $s_re .= preg_quote($chars_lc[$i], $delimiter); + elseif (self::is_ascii($chars_lc[$i])) + $s_re .= '[' . preg_quote($chars_lc[$i] . $chars_uc[$i], $delimiter) . ']'; + else + $s_re .= '(' . preg_quote($chars_lc[$i], $delimiter) . '|' + . preg_quote($chars_uc[$i], $delimiter) . ')'; + } + return $s_re; + } + + /** + * Call preg_match_all() and convert byte offsets into character offsets for PREG_OFFSET_CAPTURE flag. + * This is regardless of whether you use /u modifier. + * + * @link http://bolknote.ru/2010/09/08/~2704 + * + * @param string $pattern + * @param string|null $subject + * @param array $matches + * @param int $flags + * @param int $char_offset + * @return array|bool|null Returns FALSE if error occurred + */ + public static function preg_match_all($pattern, $subject, &$matches, $flags = PREG_PATTERN_ORDER, $char_offset = 0) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($subject)) return null; + + $byte_offset = ($char_offset > 0) ? strlen(self::substr($subject, 0, $char_offset)) : $char_offset; + + $return = preg_match_all($pattern, $subject, $matches, $flags, $byte_offset); + if ($return === false) return false; + + if ($flags & PREG_OFFSET_CAPTURE) + { + foreach ($matches as &$match) + { + foreach ($match as &$a) $a[1] = self::strlen(substr($subject, 0, $a[1])); + } + } + + return $return; + } + + #alias for self::str_limit() + public static function truncate($s, $maxlength = null, $continue = "\xe2\x80\xa6", &$is_cutted = null, $tail_min_length = 20) + { + return self::str_limit($s, $maxlength, $continue, $is_cutted, $tail_min_length); + } + + /** + * Обрезает текст в кодировке UTF-8 до заданной длины, + * причём последнее слово показывается целиком, а не обрывается на середине. + * Html сущности корректно обрабатываются. + * + * @param string|null $s Текст в кодировке UTF-8 + * @param int|null|digit $maxlength Ограничение длины текста + * @param string $continue Завершающая строка, которая будет вставлена после текста, если он обрежется + * @param bool|null &$is_cutted Текст был обрезан? + * @param int|digit $tail_min_length Если длина "хвоста", оставшегося после обрезки текста, меньше $tail_min_length, + * то текст возвращается без изменений + * @return string|bool|null Returns FALSE if error occurred + */ + public static function str_limit($s, $maxlength = null, $continue = "\xe2\x80\xa6", &$is_cutted = null, $tail_min_length = 20) #"\xe2\x80\xa6" = "…" + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + $is_cutted = false; + if ($continue === null) $continue = "\xe2\x80\xa6"; + if (! $maxlength) $maxlength = 256; + + #speed improve block + #{{{ + if (strlen($s) <= $maxlength) return $s; + $s2 = str_replace("\r\n", '?', $s); + $s2 = preg_replace('/&(?> [a-zA-Z][a-zA-Z\d]+ + | \#(?> \d{1,4} + | x[\da-fA-F]{2,4} + ) + ); # html сущности (< > & ") + /sxSX', '?', $s2); + if (strlen($s2) <= $maxlength || self::strlen($s2) <= $maxlength) return $s; + #}}} + + $r = preg_match_all('/(?> \r\n # переносы строк + | &(?> [a-zA-Z][a-zA-Z\d]+ + | \#(?> \d{1,4} + | x[\da-fA-F]{2,4} + ) + ); # html сущности (< > & ") + | . + ) + /sxuSX', $s, $m); + if ($r === false) return false; + + #d($m); + if (count($m[0]) <= $maxlength) return $s; + + $left = implode('', array_slice($m[0], 0, $maxlength)); + #из диапазона ASCII исключаем буквы, цифры, открывающие парные символы [a-zA-Z\d\(\{\[] и некоторые др. символы + #нельзя вырезать в конце строки символ ";", т.к. он используются в сущностях &xxx; + $left2 = rtrim($left, "\x00..\x28\x2A..\x2F\x3A\x3C..\x3E\x40\x5B\x5C\x5E..\x60\x7B\x7C\x7E\x7F"); + if (strlen($left) !== strlen($left2)) $return = $left2 . $continue; + else + { + #добавляем остаток к обрезанному слову + $right = implode('', array_slice($m[0], $maxlength)); + preg_match('/^(?> [\d\)\]\}\-\.:]+ #цифры, закрывающие парные символы, дефис для составных слов, дата, время, IP-адреса, URL типа www.ya.ru:80! + | \p{L}+ #буквы + | \xe2\x80\x9d #закрывающие кавычки + | \xe2\x80\x99 #закрывающие кавычки + | \xe2\x80\x9c #закрывающие кавычки + | \xc2\xbb #закрывающие кавычки + )+ + /suxSX', $right, $m); + #d($m); + $right = isset($m[0]) ? rtrim($m[0], '.-') : ''; + $return = $left . $right; + if (strlen($return) !== strlen($s)) $return .= $continue; + } + if (self::strlen($s) - self::strlen($return) < $tail_min_length) return $s; + + $is_cutted = true; + return $return; + } + + /** + * Implementation str_split() function for UTF-8 encoding string. + * + * @param string|null $s + * @param int|null|digit $length + * @return array|bool|null Returns FALSE if error occurred + */ + public static function str_split($s, $length = null) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + $length = ($length === null) ? 1 : intval($length); + if ($length < 1) return false; + #there are limits in regexp for {min,max}! + if (preg_match_all('~.~suSX', $s, $m) === false) return false; + if (function_exists('preg_last_error') && preg_last_error() !== PREG_NO_ERROR) return false; + if ($length === 1) $a = $m[0]; + else + { + $a = array(); + for ($i = 0, $c = count($m[0]); $i < $c; $i += $length) $a[] = implode('', array_slice($m[0], $i, $length)); + } + return $a; + } + + /** + * Implementation strlen() function for UTF-8 encoding string. + * + * @param string|null $s + * @return int|bool|null Returns FALSE if error occurred + */ + public static function strlen($s) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + //since PHP-5.3.x mb_strlen() faster then strlen(utf8_decode()) + if (function_exists('mb_strlen')) return mb_strlen($s, 'utf-8'); + + /* + utf8_decode() converts characters that are not in ISO-8859-1 to '?', which, for the purpose of counting, is quite alright. + It's much faster than iconv_strlen() + Note: this function does not count bad UTF-8 bytes in the string - these are simply ignored + */ + return strlen(utf8_decode($s)); + + /* + #slowly then strlen(utf8_decode()) + if (function_exists('iconv_strlen')) return iconv_strlen($s, 'utf-8'); + + #Do not count UTF-8 continuation bytes + #return strlen(preg_replace('/[\x80-\xBF]/sSX', '', $s)); + + #slowly then strlen(utf8_decode()) + preg_match_all('~.~suSX', $str, $m); + return count($m[0]); + + #slowly then preg_match_all() + count() + $n = 0; + for ($i = 0, $len = strlen($s); $i < $len; $i++) + { + $c = ord(substr($s, $i, 1)); + if ($c < 0x80) $n++; #single-byte (0xxxxxx) + elseif (($c & 0xC0) == 0xC0) $n++; #multi-byte starting byte (11xxxxxx) + } + return $n; + */ + } + + /** + * Implementation strpos() function for UTF-8 encoding string + * + * @param string|null $s The entire string + * @param string|int $needle The searched substring + * @param int|null $offset The optional offset parameter specifies the position from which the search should be performed + * @return int|bool|null Returns the numeric position of the first occurrence of needle in haystack. + * If needle is not found, will return FALSE. + */ + public static function strpos($s, $needle, $offset = null) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + if ($offset === null || $offset < 0) $offset = 0; + if (function_exists('mb_strpos')) return mb_strpos($s, $needle, $offset, 'utf-8'); + #iconv_strpos() deprecated, because slowly than self::strlen(substr()) + #if (function_exists('iconv_strpos')) return iconv_strpos($s, $needle, $offset, 'utf-8'); + $byte_pos = $offset; + do if (($byte_pos = strpos($s, $needle, $byte_pos)) === false) return false; + while (($char_pos = self::strlen(substr($s, 0, $byte_pos++))) < $offset); + return $char_pos; + } + + /** + * Find position of first occurrence of a case-insensitive string. + * + * @param string|null $s The entire string + * @param string|int $needle The searched substring + * @param int|null $offset The optional offset parameter specifies the position from which the search should be performed + * @return int|bool|null Returns the numeric position of the first occurrence of needle in haystack. + * If needle is not found, will return FALSE. + */ + public static function stripos($s, $needle, $offset = null) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + if ($offset === null || $offset < 0) $offset = 0; + if (function_exists('mb_stripos')) return mb_stripos($s, $needle, $offset, 'utf-8'); + + #optimization block (speed improve) + #{{{ + $ascii_int = intval(self::is_ascii($s)) + intval(self::is_ascii($needle)); + if ($ascii_int === 1) return false; + if ($ascii_int === 2) return stripos($s, $needle, $offset); + #}}} + + $s = self::convert_case($s, CASE_LOWER, false); + if ($s === false) return false; + $needle = self::convert_case($needle, CASE_LOWER, false); + if ($needle === false) return false; + return self::strpos($s, $needle, $offset); + } + + /** + * Implementation strrev() function for UTF-8 encoding string + * + * @param string|null $s + * @return string|bool|null Returns FALSE if error occurred + */ + public static function strrev($s) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + if (0) #TODO test speed + { + $s = self::_convert($s, 'UTF-8', 'UTF-32'); + if (! is_string($s)) return false; + $s = implode('', array_reverse(str_split($s, 4))); + return self::_convert($s, 'UTF-32', 'UTF-8'); + } + + if (! is_array($a = self::str_split($s))) return false; + return implode('', array_reverse($a)); + } + + /** + * Implementation substr() function for UTF-8 encoding string. + * + * @link http://www.w3.org/International/questions/qa-forms-utf-8.html + * @param string|null $s + * @param int|digit $offset + * @param int|null|digit $length + * @return string|bool|null Returns FALSE if error occurred + */ + public static function substr($s, $offset, $length = null) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + #since PHP-5.3.x mb_substr() faster then iconv_substr() + if (function_exists('mb_substr')) + { + if ($length === null) $length = self::strlen($s); + return mb_substr($s, $offset, $length, 'utf-8'); + } + if (function_exists('iconv_substr')) + { + if ($length === null) $length = self::strlen($s); + return iconv_substr($s, $offset, $length, 'utf-8'); + } + + static $_s = null; + static $_a = null; + + if ($_s !== $s) $_a = self::str_split($_s = $s); + if (! is_array($_a)) return false; + if ($length !== null) $a = array_slice($_a, $offset, $length); + else $a = array_slice($_a, $offset); + return implode('', $a); + } + + /** + * Implementation substr_replace() function for UTF-8 encoding string. + * + * @param string|null $s + * @param string|int $replacement + * @param int|digit $start + * @param int|null $length + * @return string|bool|null Returns FALSE if error occurred + */ + public static function substr_replace($s, $replacement, $start, $length = null) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + if (! is_array($a = self::str_split($s))) return false; + array_splice($a, $start, $length, $replacement); + return implode('', $a); + } + + /** + * Implementation ucfirst() function for UTF-8 encoding string. + * Преобразует первый символ строки в кодировке UTF-8 в верхний регистр. + * + * @param string|null $s + * @param bool $is_other_to_lowercase остальные символы преобразуются в нижний регистр? + * @return string|bool|null Returns FALSE if error occurred + */ + public static function ucfirst($s, $is_other_to_lowercase = true) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + if ($s === '' || ! is_string($s)) return $s; + if (! preg_match('/^(.)(.*)$/suSX', $s, $m)) return false; + return self::uppercase($m[1]) . ($is_other_to_lowercase ? self::lowercase($m[2]) : $m[2]); + } + + /** + * Implementation ucwords() function for UTF-8 encoding string. + * Преобразует в верхний регистр первый символ каждого слова в строке в кодировке UTF-8, + * остальные символы каждого слова преобразуются в нижний регистр. + * + * @param string|null $s + * @param bool $is_other_to_lowercase остальные символы преобразуются в нижний регистр? + * @param string $spaces_re + * @return string|bool|null Returns FALSE if error occurred + */ + public static function ucwords($s, $is_other_to_lowercase = true, $spaces_re = '~([\pZ\s]+)~suSX') #\pXps is POSIX space: property Z or tab, NL, VT, FF, CR + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_null($s)) return $s; + + $words = preg_split($spaces_re, $s, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); + foreach ($words as $k => $word) + { + $words[$k] = self::ucfirst($word, $is_other_to_lowercase = true); + if ($words[$k] === false) return false; + } + return implode('', $words); + } + + /** + * Decodes a string in the format %uXXXX or %u{XXXXXX} in the UTF-8 string. + * + * Используется для декодирования данных типа "%u0442%u0435%u0441%u0442", + * закодированных устаревшей функцией javascript://encode(). + * Рекомендуется использовать функцию javascript://encodeURIComponent(). + * + * NOTICE + * Устаревший формат %uXXXX позволяет использовать юникод только из диапазона UCS-2, т.е. от U+0 до U+FFFF + * + * @param scalar|array|null $data + * @param bool $is_rawurlencode + * @return scalar|array|null Returns FALSE if error occurred + */ + public static function unescape($data, $is_rawurlencode = false) + { + if (! ReflectionTypeHint::isValid()) return false; + if (is_array($data)) + { + $d = array(); + foreach ($data as $k => &$v) + { + $k = self::unescape($k, $is_rawurlencode); + if ($k === false) return false; + $d[$k] = self::unescape($v, $is_rawurlencode); + if ($d[$k] === false && ! is_bool($v)) return false; + } + return $d; + } + if (is_string($data)) + { + if (strpos($data, '%u') === false) return $data; #use strpos() for speed improving + return preg_replace_callback('/%u( [\da-fA-F]{4}+ #%uXXXX only UCS-2 + | \{ [\da-fA-F]{1,6}+ \} #%u{XXXXXX} extended form for all UNICODE charts + ) + /sxSX', + function (array $m) use ($is_rawurlencode) + { + $codepoint = hexdec(trim($m[1], '{}')); + $char = self::chr($codepoint); + return $is_rawurlencode ? rawurlencode($char) : $char; + }, + $data); + } + if (is_scalar($data) || is_null($data)) return $data; #~ null, integer, float, boolean + return false; #object or resource + } + + /** + * 1) Corrects the global arrays $_GET, $_POST, $_COOKIE, $_REQUEST + * decoded values ​​in the format %uXXXX and %u{XXXXXX}, encoded, + * for example, through an outdated javascript function escape(). + * Standard PHP5 cannot do it. + * 2) If in the HTTP_COOKIE there are parameters with the same name, + * takes the last value, not the first, as in the QUERY_STRING. + * 3) Creates an array of $_POST for non-standard Content-Type, for example, "Content-Type: application/octet-stream". + * Standard PHP5 creates an array for "Content-Type: application/x-www-form-urlencoded" and "Content-Type: multipart/form-data". + * + * Сессии, куки и независимая авторизация на поддоменах. + * + * ПРИМЕР 1 + * У рабочего сайта http://domain.com появились поддомены. + * Для кроссдоменной авторизации через механизм сессий имя хоста для COOKIE было изменено с "domain.com" на ".domain.com" + * В результате авторизация не работает. + * Помогает очистка COOKIE, но их принудительная очистка на тысячах пользовательских компьютеров проблематична. + * Проблема в следующем: если в HTTP_COOKIE есть параметры с одинаковым именем, то берётся последнее значение, + * а не первое, как в QUERY_STRING. + * Более подробное описание: + * PHP не правильно (?) обрабатывает заголовок HTTP_COOKIE, если там встречаются параметры с одинаковым именем, но разными значениями. + * Пример запроса HTTP-заголовка клиентом: "Cookie: sid=chpgs2fiak-330mzqza; sid=cmz5tnp5zz-xlbbgqp" + * В этом случае сервер берёт первое значение, а не последнее. + * Хотя если в QUERY_STRING есть такая ситуация, всегда берётся последний параметр. + * В HTTP_COOKIE два параметра с одинаковым именем могут появиться, если отправить клиенту следующие HTTP-заголовки: + * "Set-Cookie: sid=chpgs2fiak-330mzqza; expires=Thu, 15 Oct 2009 14:23:42 GMT; path=/; domain=domain.com" (только domain.com) + * "Set-Cookie: sid=cmz6uqorzv-1bn35110; expires=Thu, 15 Oct 2009 14:23:42 GMT; path=/; domain=.domain.com" (domain.com и все его поддомены) + * Решение: поменять имя сессии. + * + * ПРИМЕР 2 + * Есть рабочие сайты: http://domain.com (основной), http://admin.domain.com (админка), + * http://sub1.domain.com (подпроект 1), http://sub2.domain.com, (подпроект 2). + * Так же имеется сервер разработки http://dev.domain.com, на котором м. б. свои поддомены. + * Требуется сделать независимую кросс-доменную авторизацию для http://*.domain.com и http://*.dev.domain.com. + * Для сохранения статуса авторизации будем использовать сессию, имя и значение которой пишется в COOKIE. + * Т. к. домены http://*.dev.domain.com имеют пересечение с доменами http://*.domain.com, + * для независимой авторизации нужно использовать разные имена сессий. + * Пример HTTP заголовков ответа сервера: + * "Set-Cookie: sid=chpgs2fiak-330mzqza; expires=Thu, 15 Oct 2009 14:23:42 GMT; path=/; domain=.domain.com" (.domain.com и все его поддомены) + * "Set-Cookie: sid.dev=cmz6uqorzv-1bn35110; expires=Thu, 15 Oct 2009 14:23:42 GMT; path=/; domain=.dev.domain.com" (dev.domain.com и все его поддомены) + * + * @link http://tools.ietf.org/html/rfc2965 RFC 2965 - HTTP State Management Mechanism + * @return void + */ + public static function unescape_request() + { + $fixed = false; + #ATTENTION! HTTP_RAW_POST_DATA is only accessible when Content-Type of POST request is NOT default "application/x-www-form-urlencoded"! + $HTTP_RAW_POST_DATA = isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] === 'POST' ? (isset($GLOBALS['HTTP_RAW_POST_DATA']) ? $GLOBALS['HTTP_RAW_POST_DATA'] : @file_get_contents('php://input')) : null; + if (ini_get('always_populate_raw_post_data')) $GLOBALS['HTTP_RAW_POST_DATA'] = $HTTP_RAW_POST_DATA; + foreach (array( '_GET' => isset($_SERVER['QUERY_STRING']) ? $_SERVER['QUERY_STRING'] : null, + '_POST' => $HTTP_RAW_POST_DATA, + '_COOKIE' => isset($_SERVER['HTTP_COOKIE']) ? $_SERVER['HTTP_COOKIE'] : null, + ) as $k => $v) + { + if (! is_string($v)) continue; + if ($k === '_COOKIE') + { + $v = preg_replace('/; *+/sSX', '&', $v); + unset($_COOKIE); #будем парсить HTTP_COOKIE сами, чтобы сделать обработку как у QUERY_STRING + } + if (strpos($v, '%u') !== false) + { + parse_str(self::unescape($v, $is_rawurlencode = true), $GLOBALS[$k]); + $fixed = true; + continue; + } + if (array_key_exists($k, $GLOBALS)) continue; + parse_str($v, $GLOBALS[$k]); + $fixed = true; + } + if ($fixed) + { + $_REQUEST = + (isset($_COOKIE) ? $_COOKIE : array()) + + (isset($_POST) ? $_POST : array()) + + (isset($_GET) ? $_GET : array()); + } + } + + /** + * Calculates the height of the edit text in