aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKlaus Weidenbach <Klaus.Weidenbach@gmx.net>2017-10-25 01:57:18 +0200
committerKlaus Weidenbach <Klaus.Weidenbach@gmx.net>2017-10-29 22:00:06 +0100
commit8e4c5db766ce23d05b8507991b04fece743147de (patch)
tree55c89f2c145f47245e7d32380c92256051d6a8f2
parentfe5f1e4d67d999ed3c6ef78dc4d49f5dd1a93056 (diff)
downloadvolse-hubzilla-8e4c5db766ce23d05b8507991b04fece743147de.tar.gz
volse-hubzilla-8e4c5db766ce23d05b8507991b04fece743147de.tar.bz2
volse-hubzilla-8e4c5db766ce23d05b8507991b04fece743147de.zip
:arrow_up: Update Text_LanguageDetect.
Update from v0.3.0 (2012) to v1.0.0 (2017) which should remove some warnings and improve PHP7 support. Using composer to handle this PEAR library now. Fix a problem in FeedutilsTest.
-rw-r--r--composer.json3
-rw-r--r--composer.lock70
-rw-r--r--include/help.php28
-rw-r--r--include/language.php16
-rw-r--r--library/langdet/data/unicode_blocks.dat1
-rw-r--r--library/langdet/docs/example_clui.php35
-rw-r--r--library/langdet/docs/example_web.php72
-rw-r--r--library/langdet/docs/iso.php21
-rw-r--r--library/langdet/tests/Text_LanguageDetectTest.php2056
-rw-r--r--library/langdet/tests/Text_LanguageDetect_ISO639Test.php72
-rw-r--r--tests/unit/includes/FeedutilsTest.php (renamed from tests/unit/includes/FeedutilsText.php)9
-rw-r--r--tests/unit/includes/LanguageTest.php5
-rw-r--r--vendor/composer/include_paths.php10
-rw-r--r--vendor/pear/text_languagedetect/README.rst157
-rw-r--r--vendor/pear/text_languagedetect/Text/LanguageDetect.php (renamed from library/langdet/Text/LanguageDetect.php)172
-rw-r--r--vendor/pear/text_languagedetect/Text/LanguageDetect/Exception.php (renamed from library/langdet/Text/LanguageDetect/Exception.php)24
-rw-r--r--vendor/pear/text_languagedetect/Text/LanguageDetect/ISO639.php (renamed from library/langdet/Text/LanguageDetect/ISO639.php)3
-rw-r--r--vendor/pear/text_languagedetect/Text/LanguageDetect/Parser.php (renamed from library/langdet/Text/LanguageDetect/Parser.php)189
-rw-r--r--vendor/pear/text_languagedetect/composer.json32
-rw-r--r--vendor/pear/text_languagedetect/data/build-unicode_blocks.php7
-rw-r--r--vendor/pear/text_languagedetect/data/lang.dat (renamed from library/langdet/data/lang.dat)0
-rw-r--r--vendor/pear/text_languagedetect/data/unicode_blocks.dat1
-rw-r--r--vendor/pear/text_languagedetect/data/unicode_blocks.php874
-rw-r--r--vendor/pear/text_languagedetect/package.xml246
24 files changed, 1614 insertions, 2489 deletions
diff --git a/composer.json b/composer.json
index e6bdba61c..2db9954fa 100644
--- a/composer.json
+++ b/composer.json
@@ -33,7 +33,8 @@
"bshaffer/oauth2-server-php": "^1.9",
"ezyang/htmlpurifier": "^4.9",
"simplepie/simplepie": "~1.5",
- "league/html-to-markdown": "^4.4"
+ "league/html-to-markdown": "^4.4",
+ "pear/text_languagedetect": "^1.0"
},
"require-dev" : {
"php" : ">=7.0",
diff --git a/composer.lock b/composer.lock
index f31a719c3..7c0a5e7ef 100644
--- a/composer.lock
+++ b/composer.lock
@@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
- "content-hash": "4a78983d966b7641fd532909d625c21a",
+ "content-hash": "50f781f18deef8573188a713376a711d",
"packages": [
{
"name": "bshaffer/oauth2-server-php",
@@ -225,6 +225,50 @@
"time": "2016-10-29T18:58:20+00:00"
},
{
+ "name": "pear/text_languagedetect",
+ "version": "v1.0.0",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/pear/Text_LanguageDetect.git",
+ "reference": "bb9ff6f4970f686fac59081e916b456021fe7ba6"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/pear/Text_LanguageDetect/zipball/bb9ff6f4970f686fac59081e916b456021fe7ba6",
+ "reference": "bb9ff6f4970f686fac59081e916b456021fe7ba6",
+ "shasum": ""
+ },
+ "require-dev": {
+ "phpunit/phpunit": "*"
+ },
+ "suggest": {
+ "ext-mbstring": "May require the mbstring PHP extension"
+ },
+ "type": "library",
+ "autoload": {
+ "psr-0": {
+ "Text": "./"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "include-path": [
+ "./"
+ ],
+ "license": [
+ "BSD-2-Clause"
+ ],
+ "authors": [
+ {
+ "name": "Nicholas Pisarro",
+ "email": "taak@php.net",
+ "role": "Lead"
+ }
+ ],
+ "description": "Identify human languages from text samples",
+ "homepage": "http://pear.php.net/package/Text_LanguageDetect",
+ "time": "2017-03-02T16:14:08+00:00"
+ },
+ {
"name": "psr/log",
"version": "1.0.2",
"source": {
@@ -276,12 +320,12 @@
"version": "3.2.2",
"source": {
"type": "git",
- "url": "https://github.com/fruux/sabre-dav.git",
+ "url": "https://github.com/sabre-io/dav.git",
"reference": "e987775e619728f12205606c9cc3ee565ffb1516"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/fruux/sabre-dav/zipball/e987775e619728f12205606c9cc3ee565ffb1516",
+ "url": "https://api.github.com/repos/sabre-io/dav/zipball/e987775e619728f12205606c9cc3ee565ffb1516",
"reference": "e987775e619728f12205606c9cc3ee565ffb1516",
"shasum": ""
},
@@ -359,12 +403,12 @@
"version": "3.0.0",
"source": {
"type": "git",
- "url": "https://github.com/fruux/sabre-event.git",
+ "url": "https://github.com/sabre-io/event.git",
"reference": "831d586f5a442dceacdcf5e9c4c36a4db99a3534"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/fruux/sabre-event/zipball/831d586f5a442dceacdcf5e9c4c36a4db99a3534",
+ "url": "https://api.github.com/repos/sabre-io/event/zipball/831d586f5a442dceacdcf5e9c4c36a4db99a3534",
"reference": "831d586f5a442dceacdcf5e9c4c36a4db99a3534",
"shasum": ""
},
@@ -416,12 +460,12 @@
"version": "4.2.3",
"source": {
"type": "git",
- "url": "https://github.com/fruux/sabre-http.git",
+ "url": "https://github.com/sabre-io/http.git",
"reference": "0295f9a3ee39be97e0898592fc19e42421e0cd93"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/fruux/sabre-http/zipball/0295f9a3ee39be97e0898592fc19e42421e0cd93",
+ "url": "https://api.github.com/repos/sabre-io/http/zipball/0295f9a3ee39be97e0898592fc19e42421e0cd93",
"reference": "0295f9a3ee39be97e0898592fc19e42421e0cd93",
"shasum": ""
},
@@ -472,12 +516,12 @@
"version": "1.2.1",
"source": {
"type": "git",
- "url": "https://github.com/fruux/sabre-uri.git",
+ "url": "https://github.com/sabre-io/uri.git",
"reference": "ada354d83579565949d80b2e15593c2371225e61"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/fruux/sabre-uri/zipball/ada354d83579565949d80b2e15593c2371225e61",
+ "url": "https://api.github.com/repos/sabre-io/uri/zipball/ada354d83579565949d80b2e15593c2371225e61",
"reference": "ada354d83579565949d80b2e15593c2371225e61",
"shasum": ""
},
@@ -523,12 +567,12 @@
"version": "4.1.2",
"source": {
"type": "git",
- "url": "https://github.com/fruux/sabre-vobject.git",
+ "url": "https://github.com/sabre-io/vobject.git",
"reference": "d0fde2fafa2a3dad1f559c2d1c2591d4fd75ae3c"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/fruux/sabre-vobject/zipball/d0fde2fafa2a3dad1f559c2d1c2591d4fd75ae3c",
+ "url": "https://api.github.com/repos/sabre-io/vobject/zipball/d0fde2fafa2a3dad1f559c2d1c2591d4fd75ae3c",
"reference": "d0fde2fafa2a3dad1f559c2d1c2591d4fd75ae3c",
"shasum": ""
},
@@ -620,12 +664,12 @@
"version": "1.5.0",
"source": {
"type": "git",
- "url": "https://github.com/fruux/sabre-xml.git",
+ "url": "https://github.com/sabre-io/xml.git",
"reference": "59b20e5bbace9912607481634f97d05a776ffca7"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/fruux/sabre-xml/zipball/59b20e5bbace9912607481634f97d05a776ffca7",
+ "url": "https://api.github.com/repos/sabre-io/xml/zipball/59b20e5bbace9912607481634f97d05a776ffca7",
"reference": "59b20e5bbace9912607481634f97d05a776ffca7",
"shasum": ""
},
diff --git a/include/help.php b/include/help.php
index 02c3cb8e4..0dc37e517 100644
--- a/include/help.php
+++ b/include/help.php
@@ -28,7 +28,7 @@ function get_help_content($tocpath = false) {
}
if($path) {
-
+
$title = basename($path);
if(! $tocpath)
\App::$page['title'] = t('Help:') . ' ' . ucwords(str_replace('-',' ',notags($title)));
@@ -38,10 +38,10 @@ function get_help_content($tocpath = false) {
// available and so default back to the English TOC at /doc/toc.{html,bb,md}
// TODO: This is incompatible with the hierarchical TOC construction
// defined in /Zotlabs/Widget/Helpindex.php.
- if($tocpath !== false &&
- load_doc_file('doc/' . $path . '.md') === '' &&
- load_doc_file('doc/' . $path . '.bb') === '' &&
- load_doc_file('doc/' . $path . '.html') === ''
+ if($tocpath !== false &&
+ load_doc_file('doc/' . $path . '.md') === '' &&
+ load_doc_file('doc/' . $path . '.bb') === '' &&
+ load_doc_file('doc/' . $path . '.html') === ''
) {
$path = $title;
}
@@ -120,22 +120,28 @@ function preg_callback_help_include($matches) {
}
+/**
+ * @brief
+ *
+ * @return boolean|array
+ */
function determine_help_language() {
- require_once('Text/LanguageDetect.php');
$lang_detect = new Text_LanguageDetect();
// Set this mode to recognize language by the short code like "en", "ru", etc.
$lang_detect->setNameMode(2);
- // If the language was specified in the URL, override the language preference
+ // If the language was specified in the URL, override the language preference
// of the browser. Default to English if both of these are absent.
if($lang_detect->languageExists(argv(1))) {
$lang = argv(1);
$from_url = true;
} else {
$lang = \App::$language;
- if(! isset($lang))
+ if(! isset($lang))
$lang = 'en';
+
$from_url = false;
}
+
return array('language' => $lang, 'from_url' => $from_url);
}
@@ -145,14 +151,14 @@ function load_doc_file($s) {
$x = determine_help_language();
$lang = $x['language'];
$url_idx = ($x['from_url'] ? 1 : 0);
- // The English translation is at the root of /doc/. Other languages are in
+ // The English translation is at the root of /doc/. Other languages are in
// subfolders named by the language code such as "de", "es", etc.
if($lang !== 'en') {
- $path .= '/' . $lang;
+ $path .= '/' . $lang;
}
$b = basename($s);
-
+
for($i=1+$url_idx; $i<argc()-1; $i++) {
$path .= '/' . argv($i);
}
diff --git a/include/language.php b/include/language.php
index a59823757..eb286231a 100644
--- a/include/language.php
+++ b/include/language.php
@@ -17,7 +17,7 @@
*
* Get the language setting directly from system variables, bypassing get_config()
* as database may not yet be configured.
- *
+ *
* If possible, we use the value from the browser.
*
* @return array with ordered list of preferred languages from browser
@@ -28,7 +28,7 @@ function get_browser_language() {
if (x($_SERVER, 'HTTP_ACCEPT_LANGUAGE')) {
// break up string into pieces (languages and q factors)
- preg_match_all('/([a-z]{1,8}(-[a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i',
+ preg_match_all('/([a-z]{1,8}(-[a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i',
$_SERVER['HTTP_ACCEPT_LANGUAGE'], $lang_parse);
if (count($lang_parse[1])) {
@@ -40,7 +40,7 @@ function get_browser_language() {
if ($val === '') $langs[$lang] = 1;
}
- // sort list based on value
+ // sort list based on value
arsort($langs, SORT_NUMERIC);
}
}
@@ -52,7 +52,7 @@ function get_browser_language() {
* @brief Returns the best language for which also a translation exists.
*
* This function takes the results from get_browser_language() and compares it
- * with the available translations and returns the best fitting language for
+ * with the available translations and returns the best fitting language for
* which there exists a translation.
*
* If there is no match fall back to config['system']['language']
@@ -243,11 +243,9 @@ function string_plural_select_default($n) {
*
* @see http://pear.php.net/package/Text_LanguageDetect
* @param string $s A string to examine
- * @return Language code in 2-letter ISO 639-1 (en, de, fr) format
+ * @return string Language code in 2-letter ISO 639-1 (en, de, fr) format
*/
function detect_language($s) {
- require_once('Text/LanguageDetect.php');
-
$min_length = get_config('system', 'language_detect_min_length');
if ($min_length === false)
$min_length = LANGUAGE_DETECT_MIN_LENGTH;
@@ -257,7 +255,7 @@ function detect_language($s) {
$min_confidence = LANGUAGE_DETECT_MIN_CONFIDENCE;
// embedded apps have long base64 strings which will trip up the detector.
- $naked_body = preg_replace('/\[app\](.*?)\[\/app\]/','',$s);
+ $naked_body = preg_replace('/\[app\](.*?)\[\/app\]/', '', $s);
// strip off bbcode
$naked_body = preg_replace('/\[(.+?)\]/', '', $naked_body);
if (mb_strlen($naked_body) < intval($min_length)) {
@@ -381,7 +379,7 @@ function lang_selector() {
$o = replace_macros($tpl, array(
'$title' => t('Select an alternate language'),
'$langs' => array($lang_options, $selected),
-
+
));
return $o;
diff --git a/library/langdet/data/unicode_blocks.dat b/library/langdet/data/unicode_blocks.dat
deleted file mode 100644
index 3b24cd2c1..000000000
--- a/library/langdet/data/unicode_blocks.dat
+++ /dev/null
@@ -1 +0,0 @@
-a:145:{i:0;a:3:{i:0;s:6:"0x0000";i:1;s:6:"0x007F";i:2;s:11:"Basic Latin";}i:1;a:3:{i:0;s:6:"0x0080";i:1;s:6:"0x00FF";i:2;s:18:"Latin-1 Supplement";}i:2;a:3:{i:0;s:6:"0x0100";i:1;s:6:"0x017F";i:2;s:16:"Latin Extended-A";}i:3;a:3:{i:0;s:6:"0x0180";i:1;s:6:"0x024F";i:2;s:16:"Latin Extended-B";}i:4;a:3:{i:0;s:6:"0x0250";i:1;s:6:"0x02AF";i:2;s:14:"IPA Extensions";}i:5;a:3:{i:0;s:6:"0x02B0";i:1;s:6:"0x02FF";i:2;s:24:"Spacing Modifier Letters";}i:6;a:3:{i:0;s:6:"0x0300";i:1;s:6:"0x036F";i:2;s:27:"Combining Diacritical Marks";}i:7;a:3:{i:0;s:6:"0x0370";i:1;s:6:"0x03FF";i:2;s:16:"Greek and Coptic";}i:8;a:3:{i:0;s:6:"0x0400";i:1;s:6:"0x04FF";i:2;s:8:"Cyrillic";}i:9;a:3:{i:0;s:6:"0x0500";i:1;s:6:"0x052F";i:2;s:19:"Cyrillic Supplement";}i:10;a:3:{i:0;s:6:"0x0530";i:1;s:6:"0x058F";i:2;s:8:"Armenian";}i:11;a:3:{i:0;s:6:"0x0590";i:1;s:6:"0x05FF";i:2;s:6:"Hebrew";}i:12;a:3:{i:0;s:6:"0x0600";i:1;s:6:"0x06FF";i:2;s:6:"Arabic";}i:13;a:3:{i:0;s:6:"0x0700";i:1;s:6:"0x074F";i:2;s:6:"Syriac";}i:14;a:3:{i:0;s:6:"0x0750";i:1;s:6:"0x077F";i:2;s:17:"Arabic Supplement";}i:15;a:3:{i:0;s:6:"0x0780";i:1;s:6:"0x07BF";i:2;s:6:"Thaana";}i:16;a:3:{i:0;s:6:"0x0900";i:1;s:6:"0x097F";i:2;s:10:"Devanagari";}i:17;a:3:{i:0;s:6:"0x0980";i:1;s:6:"0x09FF";i:2;s:7:"Bengali";}i:18;a:3:{i:0;s:6:"0x0A00";i:1;s:6:"0x0A7F";i:2;s:8:"Gurmukhi";}i:19;a:3:{i:0;s:6:"0x0A80";i:1;s:6:"0x0AFF";i:2;s:8:"Gujarati";}i:20;a:3:{i:0;s:6:"0x0B00";i:1;s:6:"0x0B7F";i:2;s:5:"Oriya";}i:21;a:3:{i:0;s:6:"0x0B80";i:1;s:6:"0x0BFF";i:2;s:5:"Tamil";}i:22;a:3:{i:0;s:6:"0x0C00";i:1;s:6:"0x0C7F";i:2;s:6:"Telugu";}i:23;a:3:{i:0;s:6:"0x0C80";i:1;s:6:"0x0CFF";i:2;s:7:"Kannada";}i:24;a:3:{i:0;s:6:"0x0D00";i:1;s:6:"0x0D7F";i:2;s:9:"Malayalam";}i:25;a:3:{i:0;s:6:"0x0D80";i:1;s:6:"0x0DFF";i:2;s:7:"Sinhala";}i:26;a:3:{i:0;s:6:"0x0E00";i:1;s:6:"0x0E7F";i:2;s:4:"Thai";}i:27;a:3:{i:0;s:6:"0x0E80";i:1;s:6:"0x0EFF";i:2;s:3:"Lao";}i:28;a:3:{i:0;s:6:"0x0F00";i:1;s:6:"0x0FFF";i:2;s:7:"Tibetan";}i:29;a:3:{i:0;s:6:"0x1000";i:1;s:6:"0x109F";i:2;s:7:"Myanmar";}i:30;a:3:{i:0;s:6:"0x10A0";i:1;s:6:"0x10FF";i:2;s:8:"Georgian";}i:31;a:3:{i:0;s:6:"0x1100";i:1;s:6:"0x11FF";i:2;s:11:"Hangul Jamo";}i:32;a:3:{i:0;s:6:"0x1200";i:1;s:6:"0x137F";i:2;s:8:"Ethiopic";}i:33;a:3:{i:0;s:6:"0x1380";i:1;s:6:"0x139F";i:2;s:19:"Ethiopic Supplement";}i:34;a:3:{i:0;s:6:"0x13A0";i:1;s:6:"0x13FF";i:2;s:8:"Cherokee";}i:35;a:3:{i:0;s:6:"0x1400";i:1;s:6:"0x167F";i:2;s:37:"Unified Canadian Aboriginal Syllabics";}i:36;a:3:{i:0;s:6:"0x1680";i:1;s:6:"0x169F";i:2;s:5:"Ogham";}i:37;a:3:{i:0;s:6:"0x16A0";i:1;s:6:"0x16FF";i:2;s:5:"Runic";}i:38;a:3:{i:0;s:6:"0x1700";i:1;s:6:"0x171F";i:2;s:7:"Tagalog";}i:39;a:3:{i:0;s:6:"0x1720";i:1;s:6:"0x173F";i:2;s:7:"Hanunoo";}i:40;a:3:{i:0;s:6:"0x1740";i:1;s:6:"0x175F";i:2;s:5:"Buhid";}i:41;a:3:{i:0;s:6:"0x1760";i:1;s:6:"0x177F";i:2;s:8:"Tagbanwa";}i:42;a:3:{i:0;s:6:"0x1780";i:1;s:6:"0x17FF";i:2;s:5:"Khmer";}i:43;a:3:{i:0;s:6:"0x1800";i:1;s:6:"0x18AF";i:2;s:9:"Mongolian";}i:44;a:3:{i:0;s:6:"0x1900";i:1;s:6:"0x194F";i:2;s:5:"Limbu";}i:45;a:3:{i:0;s:6:"0x1950";i:1;s:6:"0x197F";i:2;s:6:"Tai Le";}i:46;a:3:{i:0;s:6:"0x1980";i:1;s:6:"0x19DF";i:2;s:11:"New Tai Lue";}i:47;a:3:{i:0;s:6:"0x19E0";i:1;s:6:"0x19FF";i:2;s:13:"Khmer Symbols";}i:48;a:3:{i:0;s:6:"0x1A00";i:1;s:6:"0x1A1F";i:2;s:8:"Buginese";}i:49;a:3:{i:0;s:6:"0x1D00";i:1;s:6:"0x1D7F";i:2;s:19:"Phonetic Extensions";}i:50;a:3:{i:0;s:6:"0x1D80";i:1;s:6:"0x1DBF";i:2;s:30:"Phonetic Extensions Supplement";}i:51;a:3:{i:0;s:6:"0x1DC0";i:1;s:6:"0x1DFF";i:2;s:38:"Combining Diacritical Marks Supplement";}i:52;a:3:{i:0;s:6:"0x1E00";i:1;s:6:"0x1EFF";i:2;s:25:"Latin Extended Additional";}i:53;a:3:{i:0;s:6:"0x1F00";i:1;s:6:"0x1FFF";i:2;s:14:"Greek Extended";}i:54;a:3:{i:0;s:6:"0x2000";i:1;s:6:"0x206F";i:2;s:19:"General Punctuation";}i:55;a:3:{i:0;s:6:"0x2070";i:1;s:6:"0x209F";i:2;s:27:"Superscripts and Subscripts";}i:56;a:3:{i:0;s:6:"0x20A0";i:1;s:6:"0x20CF";i:2;s:16:"Currency Symbols";}i:57;a:3:{i:0;s:6:"0x20D0";i:1;s:6:"0x20FF";i:2;s:39:"Combining Diacritical Marks for Symbols";}i:58;a:3:{i:0;s:6:"0x2100";i:1;s:6:"0x214F";i:2;s:18:"Letterlike Symbols";}i:59;a:3:{i:0;s:6:"0x2150";i:1;s:6:"0x218F";i:2;s:12:"Number Forms";}i:60;a:3:{i:0;s:6:"0x2190";i:1;s:6:"0x21FF";i:2;s:6:"Arrows";}i:61;a:3:{i:0;s:6:"0x2200";i:1;s:6:"0x22FF";i:2;s:22:"Mathematical Operators";}i:62;a:3:{i:0;s:6:"0x2300";i:1;s:6:"0x23FF";i:2;s:23:"Miscellaneous Technical";}i:63;a:3:{i:0;s:6:"0x2400";i:1;s:6:"0x243F";i:2;s:16:"Control Pictures";}i:64;a:3:{i:0;s:6:"0x2440";i:1;s:6:"0x245F";i:2;s:29:"Optical Character Recognition";}i:65;a:3:{i:0;s:6:"0x2460";i:1;s:6:"0x24FF";i:2;s:22:"Enclosed Alphanumerics";}i:66;a:3:{i:0;s:6:"0x2500";i:1;s:6:"0x257F";i:2;s:11:"Box Drawing";}i:67;a:3:{i:0;s:6:"0x2580";i:1;s:6:"0x259F";i:2;s:14:"Block Elements";}i:68;a:3:{i:0;s:6:"0x25A0";i:1;s:6:"0x25FF";i:2;s:16:"Geometric Shapes";}i:69;a:3:{i:0;s:6:"0x2600";i:1;s:6:"0x26FF";i:2;s:21:"Miscellaneous Symbols";}i:70;a:3:{i:0;s:6:"0x2700";i:1;s:6:"0x27BF";i:2;s:8:"Dingbats";}i:71;a:3:{i:0;s:6:"0x27C0";i:1;s:6:"0x27EF";i:2;s:36:"Miscellaneous Mathematical Symbols-A";}i:72;a:3:{i:0;s:6:"0x27F0";i:1;s:6:"0x27FF";i:2;s:21:"Supplemental Arrows-A";}i:73;a:3:{i:0;s:6:"0x2800";i:1;s:6:"0x28FF";i:2;s:16:"Braille Patterns";}i:74;a:3:{i:0;s:6:"0x2900";i:1;s:6:"0x297F";i:2;s:21:"Supplemental Arrows-B";}i:75;a:3:{i:0;s:6:"0x2980";i:1;s:6:"0x29FF";i:2;s:36:"Miscellaneous Mathematical Symbols-B";}i:76;a:3:{i:0;s:6:"0x2A00";i:1;s:6:"0x2AFF";i:2;s:35:"Supplemental Mathematical Operators";}i:77;a:3:{i:0;s:6:"0x2B00";i:1;s:6:"0x2BFF";i:2;s:32:"Miscellaneous Symbols and Arrows";}i:78;a:3:{i:0;s:6:"0x2C00";i:1;s:6:"0x2C5F";i:2;s:10:"Glagolitic";}i:79;a:3:{i:0;s:6:"0x2C80";i:1;s:6:"0x2CFF";i:2;s:6:"Coptic";}i:80;a:3:{i:0;s:6:"0x2D00";i:1;s:6:"0x2D2F";i:2;s:19:"Georgian Supplement";}i:81;a:3:{i:0;s:6:"0x2D30";i:1;s:6:"0x2D7F";i:2;s:8:"Tifinagh";}i:82;a:3:{i:0;s:6:"0x2D80";i:1;s:6:"0x2DDF";i:2;s:17:"Ethiopic Extended";}i:83;a:3:{i:0;s:6:"0x2E00";i:1;s:6:"0x2E7F";i:2;s:24:"Supplemental Punctuation";}i:84;a:3:{i:0;s:6:"0x2E80";i:1;s:6:"0x2EFF";i:2;s:23:"CJK Radicals Supplement";}i:85;a:3:{i:0;s:6:"0x2F00";i:1;s:6:"0x2FDF";i:2;s:15:"Kangxi Radicals";}i:86;a:3:{i:0;s:6:"0x2FF0";i:1;s:6:"0x2FFF";i:2;s:34:"Ideographic Description Characters";}i:87;a:3:{i:0;s:6:"0x3000";i:1;s:6:"0x303F";i:2;s:27:"CJK Symbols and Punctuation";}i:88;a:3:{i:0;s:6:"0x3040";i:1;s:6:"0x309F";i:2;s:8:"Hiragana";}i:89;a:3:{i:0;s:6:"0x30A0";i:1;s:6:"0x30FF";i:2;s:8:"Katakana";}i:90;a:3:{i:0;s:6:"0x3100";i:1;s:6:"0x312F";i:2;s:8:"Bopomofo";}i:91;a:3:{i:0;s:6:"0x3130";i:1;s:6:"0x318F";i:2;s:25:"Hangul Compatibility Jamo";}i:92;a:3:{i:0;s:6:"0x3190";i:1;s:6:"0x319F";i:2;s:6:"Kanbun";}i:93;a:3:{i:0;s:6:"0x31A0";i:1;s:6:"0x31BF";i:2;s:17:"Bopomofo Extended";}i:94;a:3:{i:0;s:6:"0x31C0";i:1;s:6:"0x31EF";i:2;s:11:"CJK Strokes";}i:95;a:3:{i:0;s:6:"0x31F0";i:1;s:6:"0x31FF";i:2;s:28:"Katakana Phonetic Extensions";}i:96;a:3:{i:0;s:6:"0x3200";i:1;s:6:"0x32FF";i:2;s:31:"Enclosed CJK Letters and Months";}i:97;a:3:{i:0;s:6:"0x3300";i:1;s:6:"0x33FF";i:2;s:17:"CJK Compatibility";}i:98;a:3:{i:0;s:6:"0x3400";i:1;s:6:"0x4DBF";i:2;s:34:"CJK Unified Ideographs Extension A";}i:99;a:3:{i:0;s:6:"0x4DC0";i:1;s:6:"0x4DFF";i:2;s:23:"Yijing Hexagram Symbols";}i:100;a:3:{i:0;s:6:"0x4E00";i:1;s:6:"0x9FFF";i:2;s:22:"CJK Unified Ideographs";}i:101;a:3:{i:0;s:6:"0xA000";i:1;s:6:"0xA48F";i:2;s:12:"Yi Syllables";}i:102;a:3:{i:0;s:6:"0xA490";i:1;s:6:"0xA4CF";i:2;s:11:"Yi Radicals";}i:103;a:3:{i:0;s:6:"0xA700";i:1;s:6:"0xA71F";i:2;s:21:"Modifier Tone Letters";}i:104;a:3:{i:0;s:6:"0xA800";i:1;s:6:"0xA82F";i:2;s:12:"Syloti Nagri";}i:105;a:3:{i:0;s:6:"0xAC00";i:1;s:6:"0xD7AF";i:2;s:16:"Hangul Syllables";}i:106;a:3:{i:0;s:6:"0xD800";i:1;s:6:"0xDB7F";i:2;s:15:"High Surrogates";}i:107;a:3:{i:0;s:6:"0xDB80";i:1;s:6:"0xDBFF";i:2;s:27:"High Private Use Surrogates";}i:108;a:3:{i:0;s:6:"0xDC00";i:1;s:6:"0xDFFF";i:2;s:14:"Low Surrogates";}i:109;a:3:{i:0;s:6:"0xE000";i:1;s:6:"0xF8FF";i:2;s:16:"Private Use Area";}i:110;a:3:{i:0;s:6:"0xF900";i:1;s:6:"0xFAFF";i:2;s:28:"CJK Compatibility Ideographs";}i:111;a:3:{i:0;s:6:"0xFB00";i:1;s:6:"0xFB4F";i:2;s:29:"Alphabetic Presentation Forms";}i:112;a:3:{i:0;s:6:"0xFB50";i:1;s:6:"0xFDFF";i:2;s:27:"Arabic Presentation Forms-A";}i:113;a:3:{i:0;s:6:"0xFE00";i:1;s:6:"0xFE0F";i:2;s:19:"Variation Selectors";}i:114;a:3:{i:0;s:6:"0xFE10";i:1;s:6:"0xFE1F";i:2;s:14:"Vertical Forms";}i:115;a:3:{i:0;s:6:"0xFE20";i:1;s:6:"0xFE2F";i:2;s:20:"Combining Half Marks";}i:116;a:3:{i:0;s:6:"0xFE30";i:1;s:6:"0xFE4F";i:2;s:23:"CJK Compatibility Forms";}i:117;a:3:{i:0;s:6:"0xFE50";i:1;s:6:"0xFE6F";i:2;s:19:"Small Form Variants";}i:118;a:3:{i:0;s:6:"0xFE70";i:1;s:6:"0xFEFF";i:2;s:27:"Arabic Presentation Forms-B";}i:119;a:3:{i:0;s:6:"0xFF00";i:1;s:6:"0xFFEF";i:2;s:29:"Halfwidth and Fullwidth Forms";}i:120;a:3:{i:0;s:6:"0xFFF0";i:1;s:6:"0xFFFF";i:2;s:8:"Specials";}i:121;a:3:{i:0;s:7:"0x10000";i:1;s:7:"0x1007F";i:2;s:18:"Linear B Syllabary";}i:122;a:3:{i:0;s:7:"0x10080";i:1;s:7:"0x100FF";i:2;s:18:"Linear B Ideograms";}i:123;a:3:{i:0;s:7:"0x10100";i:1;s:7:"0x1013F";i:2;s:14:"Aegean Numbers";}i:124;a:3:{i:0;s:7:"0x10140";i:1;s:7:"0x1018F";i:2;s:21:"Ancient Greek Numbers";}i:125;a:3:{i:0;s:7:"0x10300";i:1;s:7:"0x1032F";i:2;s:10:"Old Italic";}i:126;a:3:{i:0;s:7:"0x10330";i:1;s:7:"0x1034F";i:2;s:6:"Gothic";}i:127;a:3:{i:0;s:7:"0x10380";i:1;s:7:"0x1039F";i:2;s:8:"Ugaritic";}i:128;a:3:{i:0;s:7:"0x103A0";i:1;s:7:"0x103DF";i:2;s:11:"Old Persian";}i:129;a:3:{i:0;s:7:"0x10400";i:1;s:7:"0x1044F";i:2;s:7:"Deseret";}i:130;a:3:{i:0;s:7:"0x10450";i:1;s:7:"0x1047F";i:2;s:7:"Shavian";}i:131;a:3:{i:0;s:7:"0x10480";i:1;s:7:"0x104AF";i:2;s:7:"Osmanya";}i:132;a:3:{i:0;s:7:"0x10800";i:1;s:7:"0x1083F";i:2;s:17:"Cypriot Syllabary";}i:133;a:3:{i:0;s:7:"0x10A00";i:1;s:7:"0x10A5F";i:2;s:10:"Kharoshthi";}i:134;a:3:{i:0;s:7:"0x1D000";i:1;s:7:"0x1D0FF";i:2;s:25:"Byzantine Musical Symbols";}i:135;a:3:{i:0;s:7:"0x1D100";i:1;s:7:"0x1D1FF";i:2;s:15:"Musical Symbols";}i:136;a:3:{i:0;s:7:"0x1D200";i:1;s:7:"0x1D24F";i:2;s:30:"Ancient Greek Musical Notation";}i:137;a:3:{i:0;s:7:"0x1D300";i:1;s:7:"0x1D35F";i:2;s:21:"Tai Xuan Jing Symbols";}i:138;a:3:{i:0;s:7:"0x1D400";i:1;s:7:"0x1D7FF";i:2;s:33:"Mathematical Alphanumeric Symbols";}i:139;a:3:{i:0;s:7:"0x20000";i:1;s:7:"0x2A6DF";i:2;s:34:"CJK Unified Ideographs Extension B";}i:140;a:3:{i:0;s:7:"0x2F800";i:1;s:7:"0x2FA1F";i:2;s:39:"CJK Compatibility Ideographs Supplement";}i:141;a:3:{i:0;s:7:"0xE0000";i:1;s:7:"0xE007F";i:2;s:4:"Tags";}i:142;a:3:{i:0;s:7:"0xE0100";i:1;s:7:"0xE01EF";i:2;s:30:"Variation Selectors Supplement";}i:143;a:3:{i:0;s:7:"0xF0000";i:1;s:7:"0xFFFFF";i:2;s:32:"Supplementary Private Use Area-A";}i:144;a:3:{i:0;s:8:"0x100000";i:1;s:8:"0x10FFFF";i:2;s:32:"Supplementary Private Use Area-B";}} \ No newline at end of file
diff --git a/library/langdet/docs/example_clui.php b/library/langdet/docs/example_clui.php
deleted file mode 100644
index 8e7d8577d..000000000
--- a/library/langdet/docs/example_clui.php
+++ /dev/null
@@ -1,35 +0,0 @@
-<?php
-
-/**
- * example usage (CLI)
- *
- * @package Text_LanguageDetect
- * @version CVS: $Id: example_clui.php 322305 2012-01-15 00:04:17Z clockwerx $
- */
-
-require_once 'Text/LanguageDetect.php';
-
-$l = new Text_LanguageDetect;
-
-$stdin = fopen('php://stdin', 'r');
-
-echo "Supported languages:\n";
-$langs = $l->getLanguages();
-sort($langs);
-echo join(', ', $langs);
-
-echo "\ntotal ", count($langs), "\n\n";
-
-while ($line = fgets($stdin)) {
- $result = $l->detect($line, 4);
- print_r($result);
- $blocks = $l->detectUnicodeBlocks($line, true);
- print_r($blocks);
-}
-
-fclose($stdin);
-unset($l);
-
-/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
-
-?>
diff --git a/library/langdet/docs/example_web.php b/library/langdet/docs/example_web.php
deleted file mode 100644
index 1e155fef2..000000000
--- a/library/langdet/docs/example_web.php
+++ /dev/null
@@ -1,72 +0,0 @@
-<?php
-
-/**
- * example usage (web)
- *
- * @package Text_LanguageDetect
- * @version CVS: $Id: example_web.php 205493 2006-01-18 00:26:57Z taak $
- */
-
-// browsers will encode multi-byte characters wrong unless they think the page is utf8-encoded
-header('Content-type: text/html; charset=utf-8', true);
-
-require_once 'Text/LanguageDetect.php';
-
-$l = new Text_LanguageDetect;
-if (isset($_REQUEST['q'])) {
- $q = stripslashes($_REQUEST['q']);
-}
-
-?>
-<html>
-<head>
-<title>Text_LanguageDetect demonstration</title>
-</head>
-<body>
-<h2>Text_LanguageDetect</h2>
-<?
-echo "<small>Supported languages:\n";
-$langs = $l->getLanguages();
-sort($langs);
-foreach ($langs as $lang) {
- echo ucfirst($lang), ', ';
- $i++;
-}
-
-echo "<br />total $i</small><br /><br />";
-
-?>
-<form method="post">
-Enter text to identify language (at least a couple of sentences):<br />
-<textarea name="q" wrap="virtual" cols="80" rows="8"><?= $q ?></textarea>
-<br />
-<input type="submit" value="Submit" />
-</form>
-<?
-if (isset($q) && strlen($q)) {
- $len = $l->utf8strlen($q);
- if ($len < 20) { // this value picked somewhat arbitrarily
- echo "Warning: string not very long ($len chars)<br />\n";
- }
-
- $result = $l->detectConfidence($q);
-
- if ($result == null) {
- echo "Text_LanguageDetect cannot identify this piece of text. <br /><br />\n";
- } else {
- echo "Text_LanguageDetect thinks this text is written in <b>{$result['language']}</b> ({$result['similarity']}, {$result['confidence']})<br /><br />\n";
- }
-
- $result = $l->detectUnicodeBlocks($q, false);
- if (!empty($result)) {
- arsort($result);
- echo "Unicode blocks present: ", join(', ', array_keys($result)), "\n<br /><br />";
- }
-}
-
-unset($l);
-
-/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
-
-?>
-</body></html>
diff --git a/library/langdet/docs/iso.php b/library/langdet/docs/iso.php
deleted file mode 100644
index 6d7ec1d2e..000000000
--- a/library/langdet/docs/iso.php
+++ /dev/null
@@ -1,21 +0,0 @@
-<?php
-/**
- * Demonstrates how to use ISO language codes.
- *
- * The "name mode" changes the way languages are accepted and returned.
- */
-require_once 'Text/LanguageDetect.php';
-$l = new Text_LanguageDetect();
-
-
-//will output the ISO 639-1 two-letter language code
-// "de"
-$l->setNameMode(2);
-echo $l->detectSimple('Das ist ein kleiner Text') . "\n";
-
-//will output the ISO 639-2 three-letter language code
-// "deu"
-$l->setNameMode(3);
-echo $l->detectSimple('Das ist ein kleiner Text') . "\n";
-
-?> \ No newline at end of file
diff --git a/library/langdet/tests/Text_LanguageDetectTest.php b/library/langdet/tests/Text_LanguageDetectTest.php
deleted file mode 100644
index bbf4dd779..000000000
--- a/library/langdet/tests/Text_LanguageDetectTest.php
+++ /dev/null
@@ -1,2056 +0,0 @@
-<?php
-
-/**
- * @package Text_LanguageDetect
- * @version CVS: $Id: Text_LanguageDetectTest.php 322353 2012-01-16 08:41:43Z cweiske $
- */
-set_include_path(
- __DIR__ . '/../' . PATH_SEPARATOR . get_include_path()
-);
-error_reporting(E_ALL|E_STRICT);
-
-require_once 'Text/LanguageDetect.php';
-require_once 'PHPUnit/Framework/TestCase.php';
-
-class Text_LanguageDetectTest extends PHPUnit_Framework_TestCase {
-
- function setup ()
- {
- ini_set('magic_quotes_runtime', 0);
- $this->x = new Text_LanguageDetect();
- }
-
- function tearDown ()
- {
- unset($this->x);
- }
-
- function test_get_data_locAbsolute()
- {
- $this->assertEquals(
- '/path/to/file',
- $this->x->_get_data_loc('/path/to/file')
- );
- }
-
- function test_get_data_locPearPath()
- {
- $this->x->_data_dir = '/path/to/pear/data';
- $this->assertEquals(
- '/path/to/pear/data/Text_LanguageDetect/file',
- $this->x->_get_data_loc('file')
- );
- }
-
- /**
- * @expectedException Text_LanguageDetect_Exception
- * @expectedExceptionMessage Language database does not exist:
- */
- function test_readdbNonexistingFile()
- {
- $this->x->_readdb('thisfiledoesnotexist');
- }
-
- /**
- * @expectedException Text_LanguageDetect_Exception
- * @expectedExceptionMessage Language database is not readable:
- */
- function test_readdbUnreadableFile()
- {
- $name = tempnam(sys_get_temp_dir(), 'unittest-Text_LanguageDetect-');
- chmod($name, 0000);
- $this->x->_readdb($name);
- }
-
- /**
- * @expectedException Text_LanguageDetect_Exception
- * @expectedExceptionMessage Language database has no elements.
- */
- function test_checkTrigramEmpty()
- {
- $this->x->_checkTrigram(array());
- }
-
- /**
- * @expectedException Text_LanguageDetect_Exception
- * @expectedExceptionMessage Language database is not an array
- */
- function test_checkTrigramNoArray()
- {
- $this->x->_checkTrigram('foo');
- }
-
- /**
- * @expectedException Text_LanguageDetect_Exception
- * @expectedExceptionMessage Error loading database. Try turning magic_quotes_runtime off
- */
- function test_checkTrigramNoArrayMagicQuotes()
- {
- if (version_compare(PHP_VERSION, '5.4.0-dev') >= 0) {
- $this->markTestSkipped('5.4.0 has no magic quotes anymore');
- }
- ini_set('magic_quotes_runtime', 1);
- $this->x->_checkTrigram('foo');
- }
-
- function test_splitter ()
- {
- $str = 'hello';
-
- $result = $this->x->_trigram($str);
-
- $this->assertEquals(array(' he' => 1, 'hel' => 1, 'ell' => 1, 'llo' => 1, 'lo ' => 1), $result);
-
- $str = 'aa aa whatever';
-
- $result = $this->x->_trigram($str);
- $this->assertEquals(2, $result[' aa']);
- $this->assertEquals(2, $result['aa ']);
- $this->assertEquals(1, $result['a a']);
-
- $str = 'aa aa';
- $result = $this->x->_trigram($str);
- $this->assertArrayNotHasKey(' a', $result, ' a');
- $this->assertArrayNotHasKey('a ', $result, 'a ');
- }
-
- function test_splitter2 ()
- {
- $str = 'resumé';
-
- $result = $this->x->_trigram($str);
-
- $this->assertTrue(isset($result['mé ']), 'mé ');
- $this->assertTrue(isset($result['umé']), 'umé');
- $this->assertTrue(!isset($result['é ']), 'é');
-
- // tests lower-casing accented characters
- $str = 'resumÉ';
-
- $result = $this->x->_trigram($str);
-
- $this->assertTrue(isset($result['mé ']),'mé ');
- $this->assertTrue(isset($result['umé']),'umé');
- $this->assertTrue(!isset($result['é ']),'é');
- }
-
- function test_sort ()
- {
- $arr = array('a' => 1, 'b' => 2, 'c' => 2);
- $this->x->_bub_sort($arr);
-
- $final_arr = array('b' => 2, 'c' => 2, 'a' => 1);
-
- $this->assertEquals($final_arr, $arr);
- }
-
- function test_error ()
- {
- // this test passes the object a series of bad strings to see how it handles them
-
- $result = $this->x->detectSimple("");
-
- $this->assertTrue(!$result);
-
- $result = $this->x->detectSimple("\n");
-
- $this->assertTrue(!$result);
-
- // should fail on extremely short strings
- $result = $this->x->detectSimple("a");
-
- $this->assertTrue(!$result);
-
- $result = $this->x->detectSimple("aa");
-
- $this->assertTrue(!$result);
-
- $result = $this->x->detectSimple('xxxxxxxxxxxxxxxxxxx');
-
- $this->assertEquals(null, $result);
- }
-
- function testOmitLanguages()
- {
- $str = 'This function may return Boolean FALSE, but may also return a non-Boolean value which evaluates to FALSE, such as 0 or "". Please read the section on Booleans for more information. Use the === operator for testing the return value of this function.';
-
- $myobj = new Text_LanguageDetect;
-
- $myobj->_use_unicode_narrowing = false;
-
- $count = $myobj->getLanguageCount();
- $returnval = $myobj->omitLanguages('english');
- $newcount = $myobj->getLanguageCount();
-
- $this->assertEquals(1, $returnval);
- $this->assertEquals(1, $count - $newcount);
-
- $result = strtolower($myobj->detectSimple($str));
-
- $this->assertTrue($result != 'english', $result);
-
- $myobj = new Text_LanguageDetect;
-
- $count = $myobj->getLanguageCount();
- $returnval = $myobj->omitLanguages(array('danish', 'italian'), true);
- $newcount = $myobj->getLanguageCount();
-
- $this->assertEquals($count - $newcount, $returnval);
- $this->assertEquals($count - $returnval, $newcount);
-
- $result = strtolower($myobj->detectSimple($str));
-
- $this->assertTrue($result == 'danish' || $result == 'italian', $result);
-
- $result = $myobj->detect($str);
-
- $this->assertEquals(2, count($result));
- $this->assertTrue(isset($result['danish']));
- $this->assertTrue(isset($result['italian']));
-
- unset($myobj);
- }
-
- function testOmitLanguagesNameMode2()
- {
- $this->x->setNameMode(2);
- $this->assertEquals(1, $this->x->omitLanguages('en'));
- }
-
- function testOmitLanguagesIncludeString()
- {
- $this->assertGreaterThan(1, $this->x->omitLanguages('english', true));
- $langs = $this->x->getLanguages();
- $this->assertEquals(1, count($langs));
- $this->assertContains('english', $langs);
- }
-
- function testOmitLanguagesClearsClusterCache()
- {
- $this->x->omitLanguages(array('english', 'german'), true);
- $this->assertNull($this->x->_clusters);
- $this->x->clusterLanguages();
- $this->assertNotNull($this->x->_clusters);
- $this->x->omitLanguages('german');
- $this->assertNull($this->x->_clusters, 'cluster cache be empty now');
- }
-
- function test_perl_compatibility()
- {
- // if this test fails, then many of the others will
-
- $myobj = new Text_LanguageDetect;
- $myobj->setPerlCompatible(true);
-
- $testtext = "hello";
-
- $result = $myobj->_trigram($testtext);
-
- $this->assertTrue(!isset($result[' he']));
- }
-
- function test_french_db ()
- {
-
- $safe_model = array(
- "es " => 0, " de" => 1, "de " => 2, " le" => 3, "ent" => 4,
- "le " => 5, "nt " => 6, "la " => 7, "s d" => 8, " la" => 9,
- "ion" => 10, "on " => 11, "re " => 12, " pa" => 13, "e l" => 14,
- "e d" => 15, " l'" => 16, "e p" => 17, " co" => 18, " pr" => 19,
- "tio" => 20, "ns " => 21, " en" => 22, "ne " => 23, "que" => 24,
- "r l" => 25, "les" => 26, "ur " => 27, "en " => 28, "ati" => 29,
- "ue " => 30, " po" => 31, " d'" => 32, "par" => 33, " a " => 34,
- "et " => 35, "it " => 36, " qu" => 37, "men" => 38, "ons" => 39,
- "te " => 40, " et" => 41, "t d" => 42, " re" => 43, "des" => 44,
- " un" => 45, "ie " => 46, "s l" => 47, " su" => 48, "pou" => 49,
- " au" => 50, " à " => 51, "con" => 52, "er " => 53, " no" => 54,
- "ait" => 55, "e c" => 56, "se " => 57, "té " => 58, "du " => 59,
- " du" => 60, " dé" => 61, "ce " => 62, "e e" => 63, "is " => 64,
- "n d" => 65, "s a" => 66, " so" => 67, "e r" => 68, "e s" => 69,
- "our" => 70, "res" => 71, "ssi" => 72, "eur" => 73, " se" => 74,
- "eme" => 75, "est" => 76, "us " => 77, "sur" => 78, "ant" => 79,
- "iqu" => 80, "s p" => 81, "une" => 82, "uss" => 83, "l'a" => 84,
- "pro" => 85, "ter" => 86, "tre" => 87, "end" => 88, "rs " => 89,
- " ce" => 90, "e a" => 91, "t p" => 92, "un " => 93, " ma" => 94,
- " ru" => 95, " ré" => 96, "ous" => 97, "ris" => 98, "rus" => 99,
- "sse" => 100, "ans" => 101, "ar " => 102, "com" => 103, "e m" => 104,
- "ire" => 105, "nce" => 106, "nte" => 107, "t l" => 108, " av" => 109,
- " mo" => 110, " te" => 111, "il " => 112, "me " => 113, "ont" => 114,
- "ten" => 115, "a p" => 116, "dan" => 117, "pas" => 118, "qui" => 119,
- "s e" => 120, "s s" => 121, " in" => 122, "ist" => 123, "lle" => 124,
- "nou" => 125, "pré" => 126, "'un" => 127, "air" => 128, "d'a" => 129,
- "ir " => 130, "n e" => 131, "rop" => 132, "ts " => 133, " da" => 134,
- "a s" => 135, "as " => 136, "au " => 137, "den" => 138, "mai" => 139,
- "mis" => 140, "ori" => 141, "out" => 142, "rme" => 143, "sio" => 144,
- "tte" => 145, "ux " => 146, "a d" => 147, "ien" => 148, "n a" => 149,
- "ntr" => 150, "omm" => 151, "ort" => 152, "ouv" => 153, "s c" => 154,
- "son" => 155, "tes" => 156, "ver" => 157, "ère" => 158, " il" => 159,
- " m " => 160, " sa" => 161, " ve" => 162, "a r" => 163, "ais" => 164,
- "ava" => 165, "di " => 166, "n p" => 167, "sti" => 168, "ven" => 169,
- " mi" => 170, "ain" => 171, "enc" => 172, "for" => 173, "ité" => 174,
- "lar" => 175, "oir" => 176, "rem" => 177, "ren" => 178, "rro" => 179,
- "rés" => 180, "sie" => 181, "t a" => 182, "tur" => 183, " pe" => 184,
- " to" => 185, "d'u" => 186, "ell" => 187, "err" => 188, "ers" => 189,
- "ide" => 190, "ine" => 191, "iss" => 192, "mes" => 193, "por" => 194,
- "ran" => 195, "sit" => 196, "st " => 197, "t r" => 198, "uti" => 199,
- "vai" => 200, "é l" => 201, "ési" => 202, " di" => 203, " n'" => 204,
- " ét" => 205, "a c" => 206, "ass" => 207, "e t" => 208, "in " => 209,
- "nde" => 210, "pre" => 211, "rat" => 212, "s m" => 213, "ste" => 214,
- "tai" => 215, "tch" => 216, "ui " => 217, "uro" => 218, "ès " => 219,
- " es" => 220, " fo" => 221, " tr" => 222, "'ad" => 223, "app" => 224,
- "aux" => 225, "e à" => 226, "ett" => 227, "iti" => 228, "lit" => 229,
- "nal" => 230, "opé" => 231, "r d" => 232, "ra " => 233, "rai" => 234,
- "ror" => 235, "s r" => 236, "tat" => 237, "uté" => 238, "à l" => 239,
- " af" => 240, "anc" => 241, "ara" => 242, "art" => 243, "bre" => 244,
- "ché" => 245, "dre" => 246, "e f" => 247, "ens" => 248, "lem" => 249,
- "n r" => 250, "n t" => 251, "ndr" => 252, "nne" => 253, "onn" => 254,
- "pos" => 255, "s t" => 256, "tiq" => 257, "ure" => 258, " tu" => 259,
- "ale" => 260, "and" => 261, "ave" => 262, "cla" => 263, "cou" => 264,
- "e n" => 265, "emb" => 266, "ins" => 267, "jou" => 268, "mme" => 269,
- "rie" => 270, "rès" => 271, "sem" => 272, "str" => 273, "t i" => 274,
- "ues" => 275, "uni" => 276, "uve" => 277, "é d" => 278, "ée " => 279,
- " ch" => 280, " do" => 281, " eu" => 282, " fa" => 283, " lo" => 284,
- " ne" => 285, " ra" => 286, "arl" => 287, "att" => 288, "ec " => 289,
- "ica" => 290, "l a" => 291, "l'o" => 292, "l'é" => 293, "mmi" => 294,
- "nta" => 295, "orm" => 296, "ou " => 297, "r u" => 298, "rle" => 299
- );
-
-
- $my_arr = $this->x->_lang_db['french'];
-
- foreach ($safe_model as $key => $value) {
- $this->assertTrue(isset($my_arr[$key]),$key);
- if (isset($my_arr[$key])) {
- $this->assertEquals($value, $my_arr[$key], $key);
- }
- }
- }
-
- function test_english_db ()
- {
-
- $realdb = array(
- " th" => 0, "the" => 1, "he " => 2, "ed " => 3, " to" => 4,
- " in" => 5, "er " => 6, "ing" => 7, "ng " => 8, " an" => 9,
- "nd " => 10, " of" => 11, "and" => 12, "to " => 13, "of " => 14,
- " co" => 15, "at " => 16, "on " => 17, "in " => 18, " a " => 19,
- "d t" => 20, " he" => 21, "e t" => 22, "ion" => 23, "es " => 24,
- " re" => 25, "re " => 26, "hat" => 27, " sa" => 28, " st" => 29,
- " ha" => 30, "her" => 31, "tha" => 32, "tio" => 33, "or " => 34,
- " ''" => 35, "en " => 36, " wh" => 37, "e s" => 38, "ent" => 39,
- "n t" => 40, "s a" => 41, "as " => 42, "for" => 43, "is " => 44,
- "t t" => 45, " be" => 46, "ld " => 47, "e a" => 48, "rs " => 49,
- " wa" => 50, "ut " => 51, "ve " => 52, "ll " => 53, "al " => 54,
- " ma" => 55, "e i" => 56, " fo" => 57, "'s " => 58, "an " => 59,
- "est" => 60, " hi" => 61, " mo" => 62, " se" => 63, " pr" => 64,
- "s t" => 65, "ate" => 66, "st " => 67, "ter" => 68, "ere" => 69,
- "ted" => 70, "nt " => 71, "ver" => 72, "d a" => 73, " wi" => 74,
- "se " => 75, "e c" => 76, "ect" => 77, "ns " => 78, " on" => 79,
- "ly " => 80, "tol" => 81, "ey " => 82, "r t" => 83, " ca" => 84,
- "ati" => 85, "ts " => 86, "all" => 87, " no" => 88, "his" => 89,
- "s o" => 90, "ers" => 91, "con" => 92, "e o" => 93, "ear" => 94,
- "f t" => 95, "e w" => 96, "was" => 97, "ons" => 98, "sta" => 99,
- "'' " => 100, "sti" => 101, "n a" => 102, "sto" => 103, "t h" => 104,
- " we" => 105, "id " => 106, "th " => 107, " it" => 108, "ce " => 109,
- " di" => 110, "ave" => 111, "d h" => 112, "cou" => 113, "pro" => 114,
- "ad " => 115, "oll" => 116, "ry " => 117, "d s" => 118, "e m" => 119,
- " so" => 120, "ill" => 121, "cti" => 122, "te " => 123, "tor" => 124,
- "eve" => 125, "g t" => 126, "it " => 127, " ch" => 128, " de" => 129,
- "hav" => 130, "oul" => 131, "ty " => 132, "uld" => 133, "use" => 134,
- " al" => 135, "are" => 136, "ch " => 137, "me " => 138, "out" => 139,
- "ove" => 140, "wit" => 141, "ys " => 142, "chi" => 143, "t a" => 144,
- "ith" => 145, "oth" => 146, " ab" => 147, " te" => 148, " wo" => 149,
- "s s" => 150, "res" => 151, "t w" => 152, "tin" => 153, "e b" => 154,
- "e h" => 155, "nce" => 156, "t s" => 157, "y t" => 158, "e p" => 159,
- "ele" => 160, "hin" => 161, "s i" => 162, "nte" => 163, " li" => 164,
- "le " => 165, " do" => 166, "aid" => 167, "hey" => 168, "ne " => 169,
- "s w" => 170, " as" => 171, " fr" => 172, " tr" => 173, "end" => 174,
- "sai" => 175, " el" => 176, " ne" => 177, " su" => 178, "'t " => 179,
- "ay " => 180, "hou" => 181, "ive" => 182, "lec" => 183, "n't" => 184,
- " ye" => 185, "but" => 186, "d o" => 187, "o t" => 188, "y o" => 189,
- " ho" => 190, " me" => 191, "be " => 192, "cal" => 193, "e e" => 194,
- "had" => 195, "ple" => 196, " at" => 197, " bu" => 198, " la" => 199,
- "d b" => 200, "s h" => 201, "say" => 202, "t i" => 203, " ar" => 204,
- "e f" => 205, "ght" => 206, "hil" => 207, "igh" => 208, "int" => 209,
- "not" => 210, "ren" => 211, " is" => 212, " pa" => 213, " sh" => 214,
- "ays" => 215, "com" => 216, "n s" => 217, "r a" => 218, "rin" => 219,
- "y a" => 220, " un" => 221, "n c" => 222, "om " => 223, "thi" => 224,
- " mi" => 225, "by " => 226, "d i" => 227, "e d" => 228, "e n" => 229,
- "t o" => 230, " by" => 231, "e r" => 232, "eri" => 233, "old" => 234,
- "ome" => 235, "whe" => 236, "yea" => 237, " gr" => 238, "ar " => 239,
- "ity" => 240, "mpl" => 241, "oun" => 242, "one" => 243, "ow " => 244,
- "r s" => 245, "s f" => 246, "tat" => 247, " ba" => 248, " vo" => 249,
- "bou" => 250, "sam" => 251, "tim" => 252, "vot" => 253, "abo" => 254,
- "ant" => 255, "ds " => 256, "ial" => 257, "ine" => 258, "man" => 259,
- "men" => 260, " or" => 261, " po" => 262, "amp" => 263, "can" => 264,
- "der" => 265, "e l" => 266, "les" => 267, "ny " => 268, "ot " => 269,
- "rec" => 270, "tes" => 271, "tho" => 272, "ica" => 273, "ild" => 274,
- "ir " => 275, "nde" => 276, "ose" => 277, "ous" => 278, "pre" => 279,
- "ste" => 280, "era" => 281, "per" => 282, "r o" => 283, "red" => 284,
- "rie" => 285, " bo" => 286, " le" => 287, "ali" => 288, "ars" => 289,
- "ore" => 290, "ric" => 291, "s m" => 292, "str" => 293, " fa" => 294,
- "ess" => 295, "ie " => 296, "ist" => 297, "lat" => 298, "uri" => 299,
- );
-
- $mod = $this->x->_lang_db['english'];
-
- foreach ($realdb as $key => $value) {
- $this->assertTrue(isset($mod[$key]), $key);
- if (isset($mod[$key])) {
- $this->assertEquals($value, $mod[$key], $key);
- }
- }
-
- foreach ($mod as $key => $value) {
- $this->assertTrue(isset($realdb[$key]));
- if (isset($realdb[$key])) {
- $this->assertEquals($value, $realdb[$key], $key);
- }
- }
- }
-
- function test_confidence ()
- {
- $str = 'The next thing to notice is the Content-length header. The Content-length header notifies the server of the size of the data that you intend to send. This prevents unexpected end-of-data errors from the server when dealing with binary data, because the server will read the specified number of bytes from the data stream regardless of any spurious end-of-data characters.';
-
- $result = $this->x->detectConfidence($str);
-
- $this->assertEquals(3, count($result));
- $this->assertTrue(isset($result['language']), 'language');
- $this->assertTrue(isset($result['similarity']), 'similarity');
- $this->assertTrue(isset($result['confidence']), 'confidence');
- $this->assertEquals('english', $result['language']);
- $this->assertTrue($result['similarity'] <= 300 && $result['similarity'] >= 0, $result['similarity']);
- $this->assertTrue($result['confidence'] <= 1 && $result['confidence'] >= 0, $result['confidence']);
-
- // todo: tests for Danish and Norwegian should have lower confidence
- }
-
- function test_long_example ()
- {
- // an example that is more than 300 trigrams long
- $str = 'The Italian Renaissance began the opening phase of the Renaissance, a period of great cultural change and achievement from the 14th to the 16th century. The word renaissance means "rebirth," and the era is best known for the renewed interest in the culture of classical antiquity. The Italian Renaissance began in northern Italy, centering in Florence. It then spread south, having an especially significant impact on Rome, which was largely rebuilt by the Renaissance popes. The Italian Renaissance is best known for its cultural achievements. This includes works of literature by such figures as Petrarch, Castiglione, and Machiavelli; artists such as Michaelangelo and Leonardo da Vinci, and great works of architecture such as The Duomo in Florence and St. Peter\'s Basilica in Rome. At the same time, present-day historians also see the era as one of economic regression and of little progress in science. Furthermore, some historians argue that the lot of the peasants and urban poor, the majority of the population, worsened during this period.';
-
- $this->x->setPerlCompatible();
- $tri = $this->x->_trigram($str);
-
- $exp_tri = array(
- ' th',
- 'the',
- 'he ',
- ' an',
- ' re',
- ' of',
- 'ce ',
- 'nce',
- 'of ',
- 'ren',
- ' in',
- 'and',
- 'nd ',
- 'an ',
- 'san',
- ' it',
- 'ais',
- 'anc',
- 'ena',
- 'in ',
- 'iss',
- 'nai',
- 'ssa',
- 'tur',
- ' pe',
- 'as ',
- 'ch ',
- 'ent',
- 'ian',
- 'me ',
- 'n r',
- 'res',
- ' as',
- ' be',
- ' wo',
- 'at ',
- 'chi',
- 'e i',
- 'e o',
- 'e p',
- 'gre',
- 'his',
- 'ing',
- 'is ',
- 'ita',
- 'n f',
- 'ng ',
- 're ',
- 's a',
- 'st ',
- 'tal',
- 'ter',
- 'th ',
- 'ts ',
- 'ure',
- 'wor',
- ' ar',
- ' cu',
- ' po',
- ' su',
- 'ach',
- 'al ',
- 'ali',
- 'ans',
- 'ant',
- 'cul',
- 'e b',
- 'e r',
- 'e t',
- 'enc',
- 'era',
- 'eri',
- 'es ',
- 'est',
- 'f t',
- 'ica',
- 'ion',
- 'ist',
- 'lia',
- 'ltu',
- 'ly ',
- 'ns ',
- 'nt ',
- 'ome',
- 'on ',
- 'or ',
- 'ore',
- 'ori',
- 'rea',
- 'rom',
- 'rth',
- 's b',
- 's o',
- 'suc',
- 't t',
- 'uch',
- 'ult',
- ' ac',
- ' by',
- ' ce',
- ' da',
- ' du',
- ' er',
- ' fl',
- ' fo',
- ' gr',
- ' hi',
- ' is',
- ' kn',
- ' li',
- ' ma',
- ' on',
- ' pr',
- ' ro',
- ' so',
- 'a i',
- 'ang',
- 'arc',
- 'arg',
- 'beg',
- 'bes',
- 'by ',
- 'cen',
- 'cha',
- 'd o',
- 'd s',
- 'e a',
- 'e e',
- 'e m',
- 'e s',
- 'eat',
- 'ed ',
- 'ega',
- 'eme',
- 'ene',
- 'ess',
- 'eve',
- 'f l',
- 'flo',
- 'for',
- 'gan',
- 'gel',
- 'h a',
- 'her',
- 'hie',
- 'ich',
- 'iev',
- 'inc',
- 'iod',
- 'ite',
- 'ity',
- 'kno',
- 'ks ',
- 'l a',
- 'lit',
- 'lor',
- 'men',
- 'mic',
- 'n i',
- 'n s',
- 'n t',
- 'ne ',
- 'nge',
- 'now',
- 'nte',
- 'nts',
- 'od ',
- 'one',
- 'ope',
- 'ork',
- 'own',
- 'per',
- 'pet',
- 'pop',
- 'pre',
- 'ra ',
- 'ral',
- 'rch',
- 'reb',
- 'ria',
- 'rin',
- 'rio',
- 'rks',
- 's i',
- 's p',
- 'sen',
- 'ssi',
- 'sto',
- 't i',
- 't k',
- 't o',
- 'thi',
- 'tor',
- 'ty ',
- 'ura',
- 'vem',
- 'vin',
- 'wn ',
- 'y s',
- ' a ',
- ' al',
- ' at',
- ' ba',
- ' ca',
- ' ch',
- ' cl',
- ' ec',
- ' es',
- ' fi',
- ' fr',
- ' fu',
- ' ha',
- ' im',
- ' la',
- ' le',
- ' lo',
- ' me',
- ' mi',
- ' no',
- ' op',
- ' ph',
- ' sa',
- ' sc',
- ' se',
- ' si',
- ' sp',
- ' st',
- ' ti',
- ' to',
- ' ur',
- ' vi',
- ' wa',
- ' wh',
- '\'s ',
- 'a a',
- 'a p',
- 'a v',
- 'act',
- 'ad ',
- 'ael',
- 'ajo',
- 'all',
- 'als',
- 'aly',
- 'ame',
- 'ard',
- 'art',
- 'asa',
- 'ase',
- 'asi',
- 'ass',
- 'ast',
- 'ati',
- 'atu',
- 'ave',
- 'avi',
- 'ay ',
- 'ban',
- 'bas',
- 'bir',
- 'bui',
- 'c r',
- 'ca ',
- 'cal',
- 'can',
- 'cas',
- 'ci ',
- 'cia',
- 'cie',
- 'cla',
- 'clu',
- 'con',
- 'ct ',
- 'ctu',
- 'd a',
- 'd d',
- 'd g',
- 'd i',
- 'd l',
- 'd m',
- 'd r',
- 'd t',
- 'd u',
- 'da ',
- 'day',
- 'des',
- 'do ',
- 'duo',
- 'dur',
- 'e c',
- 'e d',
- 'e h',
- 'e l',
- 'e w',
- 'ead',
- 'ean',
- 'eas',
- 'ebi',
- 'ebu',
- 'eci',
- 'eco',
- 'ect',
- 'ee ',
- 'egr',
- 'ela',
- 'ell',
- 'elo',
- 'ely',
- 'en ',
- 'eni',
- 'eon',
- 'er\'',
- 'ere',
- 'erm',
- 'ern',
- 'ese',
- 'esp',
- 'ete',
- 'etr',
- 'ewe',
- 'f a',
- 'f c',
- 'f e',
- 'f g',
- 'fic',
- 'fig',
- 'fro',
- 'fur',
- 'g a',
- 'g i',
- 'g p',
- 'g t',
- 'ge ',
- 'gli',
- 'gni',
- 'gue',
- 'gur',
- 'h c',
- 'h f',
- 'h t',
- 'h w',
- 'hae',
- 'han',
- 'has',
- 'hat',
- 'hav',
- 'hen',
- 'hia',
- 'hic',
- 'hit',
- 'ial',
- 'iav',
- 'ic ',
- 'ien',
- 'ifi',
- 'igl',
- 'ign',
- 'igu',
- 'ili',
- 'ilt',
- 'ime',
- 'imp',
- 'int',
- 'iqu',
- 'irt',
- 'it ',
- 'its',
- 'itt',
- 'jor',
- 'l c',
- 'lan',
- 'lar',
- 'las',
- 'lat',
- 'le ',
- 'leo',
- 'li ',
- 'lic',
- 'lio',
- 'lli',
- 'lly',
- 'lo ',
- 'lot',
- 'lso',
- 'lt ',
- 'lud',
- 'm t',
- 'mac',
- 'maj',
- 'mea',
- 'mo ',
- 'mor',
- 'mpa',
- 'n a',
- 'n e',
- 'n n',
- 'n p',
- 'nar',
- 'nci',
- 'ncl',
- 'ned',
- 'new',
- 'nif',
- 'nin',
- 'nom',
- 'nor',
- 'nti',
- 'ntu',
- 'o a',
- 'o d',
- 'o i',
- 'o s',
- 'o t',
- 'ogr',
- 'om ',
- 'omi',
- 'omo',
- 'ona',
- 'ono',
- 'oor',
- 'opu',
- 'ord',
- 'ors',
- 'ort',
- 'ot ',
- 'out',
- 'pac',
- 'pea',
- 'pec',
- 'pen',
- 'pes',
- 'pha',
- 'poo',
- 'pro',
- 'pul',
- 'qui',
- 'r i',
- 'r t',
- 'r\'s',
- 'rar',
- 'rat',
- 'rba',
- 'rd ',
- 'rdo',
- 'reg',
- 'rge',
- 'rgu',
- 'rit',
- 'rmo',
- 'rn ',
- 'rog',
- 'rse',
- 'rti',
- 'ry ',
- 's c',
- 's l',
- 's m',
- 's s',
- 's t',
- 's w',
- 'sam',
- 'sci',
- 'se ',
- 'see',
- 'sic',
- 'sig',
- 'sil',
- 'sio',
- 'so ',
- 'som',
- 'sou',
- 'spe',
- 'spr',
- 'ss ',
- 'sti',
- 'sts',
- 't b',
- 't c',
- 't d',
- 't f',
- 't w',
- 'tec',
- 'tha',
- 'tig',
- 'tim',
- 'tio',
- 'tiq',
- 'tis',
- 'tle',
- 'to ',
- 'tra',
- 'ttl',
- 'ude',
- 'ue ',
- 'uil',
- 'uit',
- 'ula',
- 'uom',
- 'urb',
- 'uri',
- 'urt',
- 'ury',
- 'uth',
- 'vel',
- 'was',
- 'wed',
- 'whi',
- 'y h',
- 'y o',
- 'y r',
- 'y t'
- );
-
- $differences = array_diff(array_keys($tri), $exp_tri);
- $this->assertEquals(0, count($differences));
- $this->assertEquals(0, count(array_diff($exp_tri, array_keys($tri))));
- $this->assertEquals(count($exp_tri), count($tri));
- //print_r(array_diff($exp_tri, array_keys($tri)));
- //print_r(array_diff(array_keys($tri), $exp_tri));
-
- // tests the bubble sort mechanism
- $this->x->_bub_sort($tri);
- $this->assertEquals($exp_tri, array_keys($tri));
-
- $true_differences = array(
- "cas" => array('change' => 300, 'baserank' => 265, 'refrank' => null), "s i" => array('change' => 21, 'baserank' => 183, 'refrank' => 162),
- "e b" => array('change' => 88, 'baserank' => 66, 'refrank' => 154), "ent" => array('change' => 12, 'baserank' => 27, 'refrank' => 39),
- "ome" => array('change' => 152, 'baserank' => 83, 'refrank' => 235), "ral" => array('change' => 300, 'baserank' => 176, 'refrank' => null),
- "ita" => array('change' => 300, 'baserank' => 44, 'refrank' => null), "bas" => array('change' => 300, 'baserank' => 258, 'refrank' => null),
- " ar" => array('change' => 148, 'baserank' => 56, 'refrank' => 204), " in" => array('change' => 5, 'baserank' => 10, 'refrank' => 5),
- " ti" => array('change' => 300, 'baserank' => 227, 'refrank' => null), "ty " => array('change' => 61, 'baserank' => 193, 'refrank' => 132),
- "tur" => array('change' => 300, 'baserank' => 23, 'refrank' => null), "iss" => array('change' => 300, 'baserank' => 20, 'refrank' => null),
- "ria" => array('change' => 300, 'baserank' => 179, 'refrank' => null), " me" => array('change' => 25, 'baserank' => 216, 'refrank' => 191),
- "t k" => array('change' => 300, 'baserank' => 189, 'refrank' => null), " es" => array('change' => 300, 'baserank' => 207, 'refrank' => null),
- "ren" => array('change' => 202, 'baserank' => 9, 'refrank' => 211), "in " => array('change' => 1, 'baserank' => 19, 'refrank' => 18),
- "ly " => array('change' => 0, 'baserank' => 80, 'refrank' => 80), "st " => array('change' => 18, 'baserank' => 49, 'refrank' => 67),
- "ne " => array('change' => 8, 'baserank' => 161, 'refrank' => 169), "all" => array('change' => 154, 'baserank' => 241, 'refrank' => 87),
- "vin" => array('change' => 300, 'baserank' => 196, 'refrank' => null), " op" => array('change' => 300, 'baserank' => 219, 'refrank' => null),
- "chi" => array('change' => 107, 'baserank' => 36, 'refrank' => 143), "e w" => array('change' => 197, 'baserank' => 293, 'refrank' => 96),
- " ro" => array('change' => 300, 'baserank' => 113, 'refrank' => null), "act" => array('change' => 300, 'baserank' => 237, 'refrank' => null),
- "d r" => array('change' => 300, 'baserank' => 280, 'refrank' => null), "nt " => array('change' => 11, 'baserank' => 82, 'refrank' => 71),
- "can" => array('change' => 0, 'baserank' => 264, 'refrank' => 264), "rea" => array('change' => 300, 'baserank' => 88, 'refrank' => null),
- "ssa" => array('change' => 300, 'baserank' => 22, 'refrank' => null), " fo" => array('change' => 47, 'baserank' => 104, 'refrank' => 57),
- "eas" => array('change' => 300, 'baserank' => 296, 'refrank' => null), "mic" => array('change' => 300, 'baserank' => 157, 'refrank' => null),
- "cul" => array('change' => 300, 'baserank' => 65, 'refrank' => null), " an" => array('change' => 6, 'baserank' => 3, 'refrank' => 9),
- "n t" => array('change' => 120, 'baserank' => 160, 'refrank' => 40), "arg" => array('change' => 300, 'baserank' => 118, 'refrank' => null),
- " it" => array('change' => 93, 'baserank' => 15, 'refrank' => 108), "ebi" => array('change' => 300, 'baserank' => 297, 'refrank' => null),
- " re" => array('change' => 21, 'baserank' => 4, 'refrank' => 25), "res" => array('change' => 120, 'baserank' => 31, 'refrank' => 151),
- " be" => array('change' => 13, 'baserank' => 33, 'refrank' => 46), "rom" => array('change' => 300, 'baserank' => 89, 'refrank' => null),
- "'s " => array('change' => 175, 'baserank' => 233, 'refrank' => 58), "arc" => array('change' => 300, 'baserank' => 117, 'refrank' => null),
- " su" => array('change' => 119, 'baserank' => 59, 'refrank' => 178), "s p" => array('change' => 300, 'baserank' => 184, 'refrank' => null),
- "ich" => array('change' => 300, 'baserank' => 145, 'refrank' => null), "d d" => array('change' => 300, 'baserank' => 275, 'refrank' => null),
- "cal" => array('change' => 70, 'baserank' => 263, 'refrank' => 193), "ci " => array('change' => 300, 'baserank' => 266, 'refrank' => null),
- "ssi" => array('change' => 300, 'baserank' => 186, 'refrank' => null), "bes" => array('change' => 300, 'baserank' => 120, 'refrank' => null),
- "des" => array('change' => 300, 'baserank' => 285, 'refrank' => null), "e s" => array('change' => 91, 'baserank' => 129, 'refrank' => 38),
- "ch " => array('change' => 111, 'baserank' => 26, 'refrank' => 137), "san" => array('change' => 300, 'baserank' => 14, 'refrank' => null),
- "asi" => array('change' => 300, 'baserank' => 249, 'refrank' => null), "ajo" => array('change' => 300, 'baserank' => 240, 'refrank' => null),
- "ase" => array('change' => 300, 'baserank' => 248, 'refrank' => null), " wa" => array('change' => 181, 'baserank' => 231, 'refrank' => 50),
- "vem" => array('change' => 300, 'baserank' => 195, 'refrank' => null), "ed " => array('change' => 128, 'baserank' => 131, 'refrank' => 3),
- "ant" => array('change' => 191, 'baserank' => 64, 'refrank' => 255), "a p" => array('change' => 300, 'baserank' => 235, 'refrank' => null),
- "lor" => array('change' => 300, 'baserank' => 155, 'refrank' => null), "kno" => array('change' => 300, 'baserank' => 151, 'refrank' => null),
- "ais" => array('change' => 300, 'baserank' => 16, 'refrank' => null), " pe" => array('change' => 300, 'baserank' => 24, 'refrank' => null),
- "or " => array('change' => 51, 'baserank' => 85, 'refrank' => 34), "e i" => array('change' => 19, 'baserank' => 37, 'refrank' => 56),
- " sp" => array('change' => 300, 'baserank' => 225, 'refrank' => null), "ad " => array('change' => 123, 'baserank' => 238, 'refrank' => 115),
- " kn" => array('change' => 300, 'baserank' => 108, 'refrank' => null), "ega" => array('change' => 300, 'baserank' => 132, 'refrank' => null),
- " ba" => array('change' => 46, 'baserank' => 202, 'refrank' => 248), "d t" => array('change' => 261, 'baserank' => 281, 'refrank' => 20),
- "ork" => array('change' => 300, 'baserank' => 169, 'refrank' => null), "lia" => array('change' => 300, 'baserank' => 78, 'refrank' => null),
- "ard" => array('change' => 300, 'baserank' => 245, 'refrank' => null), "iev" => array('change' => 300, 'baserank' => 146, 'refrank' => null),
- "of " => array('change' => 6, 'baserank' => 8, 'refrank' => 14), " cu" => array('change' => 300, 'baserank' => 57, 'refrank' => null),
- "day" => array('change' => 300, 'baserank' => 284, 'refrank' => null), "cen" => array('change' => 300, 'baserank' => 122, 'refrank' => null),
- "re " => array('change' => 21, 'baserank' => 47, 'refrank' => 26), "ist" => array('change' => 220, 'baserank' => 77, 'refrank' => 297),
- " fl" => array('change' => 300, 'baserank' => 103, 'refrank' => null), "anc" => array('change' => 300, 'baserank' => 17, 'refrank' => null),
- "at " => array('change' => 19, 'baserank' => 35, 'refrank' => 16), "rch" => array('change' => 300, 'baserank' => 177, 'refrank' => null),
- "ang" => array('change' => 300, 'baserank' => 116, 'refrank' => null), " mi" => array('change' => 8, 'baserank' => 217, 'refrank' => 225),
- "y s" => array('change' => 300, 'baserank' => 198, 'refrank' => null), "ca " => array('change' => 300, 'baserank' => 262, 'refrank' => null),
- " ma" => array('change' => 55, 'baserank' => 110, 'refrank' => 55), " lo" => array('change' => 300, 'baserank' => 215, 'refrank' => null),
- "rin" => array('change' => 39, 'baserank' => 180, 'refrank' => 219), " im" => array('change' => 300, 'baserank' => 212, 'refrank' => null),
- " er" => array('change' => 300, 'baserank' => 102, 'refrank' => null), "ce " => array('change' => 103, 'baserank' => 6, 'refrank' => 109),
- "bui" => array('change' => 300, 'baserank' => 260, 'refrank' => null), "lit" => array('change' => 300, 'baserank' => 154, 'refrank' => null),
- "iod" => array('change' => 300, 'baserank' => 148, 'refrank' => null), "ame" => array('change' => 300, 'baserank' => 244, 'refrank' => null),
- "ter" => array('change' => 17, 'baserank' => 51, 'refrank' => 68), "e a" => array('change' => 78, 'baserank' => 126, 'refrank' => 48),
- "f l" => array('change' => 300, 'baserank' => 137, 'refrank' => null), "eri" => array('change' => 162, 'baserank' => 71, 'refrank' => 233),
- "ra " => array('change' => 300, 'baserank' => 175, 'refrank' => null), "ng " => array('change' => 38, 'baserank' => 46, 'refrank' => 8),
- "d i" => array('change' => 50, 'baserank' => 277, 'refrank' => 227), "asa" => array('change' => 300, 'baserank' => 247, 'refrank' => null),
- "wn " => array('change' => 300, 'baserank' => 197, 'refrank' => null), " at" => array('change' => 4, 'baserank' => 201, 'refrank' => 197),
- "now" => array('change' => 300, 'baserank' => 163, 'refrank' => null), " by" => array('change' => 133, 'baserank' => 98, 'refrank' => 231),
- "n s" => array('change' => 58, 'baserank' => 159, 'refrank' => 217), " li" => array('change' => 55, 'baserank' => 109, 'refrank' => 164),
- "l a" => array('change' => 300, 'baserank' => 153, 'refrank' => null), "da " => array('change' => 300, 'baserank' => 283, 'refrank' => null),
- "ean" => array('change' => 300, 'baserank' => 295, 'refrank' => null), "tal" => array('change' => 300, 'baserank' => 50, 'refrank' => null),
- "d a" => array('change' => 201, 'baserank' => 274, 'refrank' => 73), "ct " => array('change' => 300, 'baserank' => 272, 'refrank' => null),
- "ali" => array('change' => 226, 'baserank' => 62, 'refrank' => 288), "ian" => array('change' => 300, 'baserank' => 28, 'refrank' => null),
- " sa" => array('change' => 193, 'baserank' => 221, 'refrank' => 28), "do " => array('change' => 300, 'baserank' => 286, 'refrank' => null),
- "t o" => array('change' => 40, 'baserank' => 190, 'refrank' => 230), "ure" => array('change' => 300, 'baserank' => 54, 'refrank' => null),
- "e c" => array('change' => 213, 'baserank' => 289, 'refrank' => 76), "ing" => array('change' => 35, 'baserank' => 42, 'refrank' => 7),
- "d o" => array('change' => 63, 'baserank' => 124, 'refrank' => 187), " ha" => array('change' => 181, 'baserank' => 211, 'refrank' => 30),
- "ts " => array('change' => 33, 'baserank' => 53, 'refrank' => 86), "rth" => array('change' => 300, 'baserank' => 90, 'refrank' => null),
- "cla" => array('change' => 300, 'baserank' => 269, 'refrank' => null), " ac" => array('change' => 300, 'baserank' => 97, 'refrank' => null),
- "th " => array('change' => 55, 'baserank' => 52, 'refrank' => 107), "rio" => array('change' => 300, 'baserank' => 181, 'refrank' => null),
- "al " => array('change' => 7, 'baserank' => 61, 'refrank' => 54), "sto" => array('change' => 84, 'baserank' => 187, 'refrank' => 103),
- "e o" => array('change' => 55, 'baserank' => 38, 'refrank' => 93), "bir" => array('change' => 300, 'baserank' => 259, 'refrank' => null),
- " pr" => array('change' => 48, 'baserank' => 112, 'refrank' => 64), " le" => array('change' => 73, 'baserank' => 214, 'refrank' => 287),
- "nai" => array('change' => 300, 'baserank' => 21, 'refrank' => null), "t i" => array('change' => 15, 'baserank' => 188, 'refrank' => 203),
- " po" => array('change' => 204, 'baserank' => 58, 'refrank' => 262), "f t" => array('change' => 21, 'baserank' => 74, 'refrank' => 95),
- "ban" => array('change' => 300, 'baserank' => 257, 'refrank' => null), "an " => array('change' => 46, 'baserank' => 13, 'refrank' => 59),
- "wor" => array('change' => 300, 'baserank' => 55, 'refrank' => null), "pet" => array('change' => 300, 'baserank' => 172, 'refrank' => null),
- "ael" => array('change' => 300, 'baserank' => 239, 'refrank' => null), "ura" => array('change' => 300, 'baserank' => 194, 'refrank' => null),
- "eve" => array('change' => 11, 'baserank' => 136, 'refrank' => 125), "ion" => array('change' => 53, 'baserank' => 76, 'refrank' => 23),
- "nge" => array('change' => 300, 'baserank' => 162, 'refrank' => null), "cha" => array('change' => 300, 'baserank' => 123, 'refrank' => null),
- "ity" => array('change' => 90, 'baserank' => 150, 'refrank' => 240), " se" => array('change' => 160, 'baserank' => 223, 'refrank' => 63),
- " on" => array('change' => 32, 'baserank' => 111, 'refrank' => 79), "s b" => array('change' => 300, 'baserank' => 91, 'refrank' => null),
- "ans" => array('change' => 300, 'baserank' => 63, 'refrank' => null), "own" => array('change' => 300, 'baserank' => 170, 'refrank' => null),
- " si" => array('change' => 300, 'baserank' => 224, 'refrank' => null), "e r" => array('change' => 165, 'baserank' => 67, 'refrank' => 232),
- "est" => array('change' => 13, 'baserank' => 73, 'refrank' => 60), "hie" => array('change' => 300, 'baserank' => 144, 'refrank' => null),
- "aly" => array('change' => 300, 'baserank' => 243, 'refrank' => null), "and" => array('change' => 1, 'baserank' => 11, 'refrank' => 12),
- "beg" => array('change' => 300, 'baserank' => 119, 'refrank' => null), "dur" => array('change' => 300, 'baserank' => 288, 'refrank' => null),
- "reb" => array('change' => 300, 'baserank' => 178, 'refrank' => null), "e e" => array('change' => 67, 'baserank' => 127, 'refrank' => 194),
- "men" => array('change' => 104, 'baserank' => 156, 'refrank' => 260), " la" => array('change' => 14, 'baserank' => 213, 'refrank' => 199),
- "con" => array('change' => 179, 'baserank' => 271, 'refrank' => 92), " fu" => array('change' => 300, 'baserank' => 210, 'refrank' => null),
- "e l" => array('change' => 26, 'baserank' => 292, 'refrank' => 266), "s a" => array('change' => 7, 'baserank' => 48, 'refrank' => 41),
- "art" => array('change' => 300, 'baserank' => 246, 'refrank' => null), "ltu" => array('change' => 300, 'baserank' => 79, 'refrank' => null),
- "a i" => array('change' => 300, 'baserank' => 115, 'refrank' => null), "ctu" => array('change' => 300, 'baserank' => 273, 'refrank' => null),
- "tor" => array('change' => 68, 'baserank' => 192, 'refrank' => 124), "ach" => array('change' => 300, 'baserank' => 60, 'refrank' => null),
- "d g" => array('change' => 300, 'baserank' => 276, 'refrank' => null), "od " => array('change' => 300, 'baserank' => 166, 'refrank' => null),
- "nte" => array('change' => 1, 'baserank' => 164, 'refrank' => 163), "ena" => array('change' => 300, 'baserank' => 18, 'refrank' => null),
- "d l" => array('change' => 300, 'baserank' => 278, 'refrank' => null), "ene" => array('change' => 300, 'baserank' => 134, 'refrank' => null),
- "e h" => array('change' => 136, 'baserank' => 291, 'refrank' => 155), "era" => array('change' => 211, 'baserank' => 70, 'refrank' => 281),
- "on " => array('change' => 67, 'baserank' => 84, 'refrank' => 17), " ce" => array('change' => 300, 'baserank' => 99, 'refrank' => null),
- "ay " => array('change' => 76, 'baserank' => 256, 'refrank' => 180), " da" => array('change' => 300, 'baserank' => 100, 'refrank' => null),
- "ori" => array('change' => 300, 'baserank' => 87, 'refrank' => null), "atu" => array('change' => 300, 'baserank' => 253, 'refrank' => null),
- "ave" => array('change' => 143, 'baserank' => 254, 'refrank' => 111), "rks" => array('change' => 300, 'baserank' => 182, 'refrank' => null),
- "e d" => array('change' => 62, 'baserank' => 290, 'refrank' => 228), "ns " => array('change' => 3, 'baserank' => 81, 'refrank' => 78),
- " ca" => array('change' => 119, 'baserank' => 203, 'refrank' => 84), "d s" => array('change' => 7, 'baserank' => 125, 'refrank' => 118),
- "uch" => array('change' => 300, 'baserank' => 95, 'refrank' => null), "a v" => array('change' => 300, 'baserank' => 236, 'refrank' => null),
- "nce" => array('change' => 149, 'baserank' => 7, 'refrank' => 156), "his" => array('change' => 48, 'baserank' => 41, 'refrank' => 89),
- "flo" => array('change' => 300, 'baserank' => 138, 'refrank' => null), "ead" => array('change' => 300, 'baserank' => 294, 'refrank' => null),
- " vi" => array('change' => 300, 'baserank' => 230, 'refrank' => null), "me " => array('change' => 109, 'baserank' => 29, 'refrank' => 138),
- "suc" => array('change' => 300, 'baserank' => 93, 'refrank' => null), "e p" => array('change' => 120, 'baserank' => 39, 'refrank' => 159),
- "eci" => array('change' => 300, 'baserank' => 299, 'refrank' => null), "eme" => array('change' => 300, 'baserank' => 133, 'refrank' => null),
- "sen" => array('change' => 300, 'baserank' => 185, 'refrank' => null), "ks " => array('change' => 300, 'baserank' => 152, 'refrank' => null),
- " to" => array('change' => 224, 'baserank' => 228, 'refrank' => 4), " gr" => array('change' => 133, 'baserank' => 105, 'refrank' => 238),
- " ch" => array('change' => 76, 'baserank' => 204, 'refrank' => 128), "ati" => array('change' => 167, 'baserank' => 252, 'refrank' => 85),
- " th" => array('change' => 0, 'baserank' => 0, 'refrank' => 0), " ec" => array('change' => 300, 'baserank' => 206, 'refrank' => null),
- " wo" => array('change' => 115, 'baserank' => 34, 'refrank' => 149), "ope" => array('change' => 300, 'baserank' => 168, 'refrank' => null),
- " a " => array('change' => 180, 'baserank' => 199, 'refrank' => 19), "one" => array('change' => 76, 'baserank' => 167, 'refrank' => 243),
- "n f" => array('change' => 300, 'baserank' => 45, 'refrank' => null), "eat" => array('change' => 300, 'baserank' => 130, 'refrank' => null),
- "ica" => array('change' => 198, 'baserank' => 75, 'refrank' => 273), "inc" => array('change' => 300, 'baserank' => 147, 'refrank' => null),
- "enc" => array('change' => 300, 'baserank' => 69, 'refrank' => null), "ore" => array('change' => 204, 'baserank' => 86, 'refrank' => 290),
- "is " => array('change' => 1, 'baserank' => 43, 'refrank' => 44), " as" => array('change' => 139, 'baserank' => 32, 'refrank' => 171),
- "nts" => array('change' => 300, 'baserank' => 165, 'refrank' => null), "d m" => array('change' => 300, 'baserank' => 279, 'refrank' => null),
- "her" => array('change' => 112, 'baserank' => 143, 'refrank' => 31), " al" => array('change' => 65, 'baserank' => 200, 'refrank' => 135),
- " is" => array('change' => 105, 'baserank' => 107, 'refrank' => 212), "e t" => array('change' => 46, 'baserank' => 68, 'refrank' => 22),
- "c r" => array('change' => 300, 'baserank' => 261, 'refrank' => null), " hi" => array('change' => 45, 'baserank' => 106, 'refrank' => 61),
- "cia" => array('change' => 300, 'baserank' => 267, 'refrank' => null), " fr" => array('change' => 37, 'baserank' => 209, 'refrank' => 172),
- "ult" => array('change' => 300, 'baserank' => 96, 'refrank' => null), "e m" => array('change' => 9, 'baserank' => 128, 'refrank' => 119),
- "ass" => array('change' => 300, 'baserank' => 250, 'refrank' => null), "s o" => array('change' => 2, 'baserank' => 92, 'refrank' => 90),
- "pop" => array('change' => 300, 'baserank' => 173, 'refrank' => null), "nd " => array('change' => 2, 'baserank' => 12, 'refrank' => 10),
- "the" => array('change' => 0, 'baserank' => 1, 'refrank' => 1), " st" => array('change' => 197, 'baserank' => 226, 'refrank' => 29),
- " no" => array('change' => 130, 'baserank' => 218, 'refrank' => 88), "ast" => array('change' => 300, 'baserank' => 251, 'refrank' => null),
- " fi" => array('change' => 300, 'baserank' => 208, 'refrank' => null), "ess" => array('change' => 160, 'baserank' => 135, 'refrank' => 295),
- "gre" => array('change' => 300, 'baserank' => 40, 'refrank' => null), "h a" => array('change' => 300, 'baserank' => 142, 'refrank' => null),
- "duo" => array('change' => 300, 'baserank' => 287, 'refrank' => null), " so" => array('change' => 6, 'baserank' => 114, 'refrank' => 120),
- "es " => array('change' => 48, 'baserank' => 72, 'refrank' => 24), "for" => array('change' => 96, 'baserank' => 139, 'refrank' => 43),
- "gan" => array('change' => 300, 'baserank' => 140, 'refrank' => null), "per" => array('change' => 111, 'baserank' => 171, 'refrank' => 282),
- "thi" => array('change' => 33, 'baserank' => 191, 'refrank' => 224), " of" => array('change' => 6, 'baserank' => 5, 'refrank' => 11),
- " cl" => array('change' => 300, 'baserank' => 205, 'refrank' => null), " sc" => array('change' => 300, 'baserank' => 222, 'refrank' => null),
- "t t" => array('change' => 49, 'baserank' => 94, 'refrank' => 45), "als" => array('change' => 300, 'baserank' => 242, 'refrank' => null),
- "avi" => array('change' => 300, 'baserank' => 255, 'refrank' => null), "cie" => array('change' => 300, 'baserank' => 268, 'refrank' => null),
- " du" => array('change' => 300, 'baserank' => 101, 'refrank' => null), "pre" => array('change' => 105, 'baserank' => 174, 'refrank' => 279),
- "as " => array('change' => 17, 'baserank' => 25, 'refrank' => 42), "a a" => array('change' => 300, 'baserank' => 234, 'refrank' => null),
- "gel" => array('change' => 300, 'baserank' => 141, 'refrank' => null), "ite" => array('change' => 300, 'baserank' => 149, 'refrank' => null),
- "n r" => array('change' => 300, 'baserank' => 30, 'refrank' => null), "by " => array('change' => 105, 'baserank' => 121, 'refrank' => 226),
- "d u" => array('change' => 300, 'baserank' => 282, 'refrank' => null), "clu" => array('change' => 300, 'baserank' => 270, 'refrank' => null),
- " ur" => array('change' => 300, 'baserank' => 229, 'refrank' => null), "ebu" => array('change' => 300, 'baserank' => 298, 'refrank' => null),
- "n i" => array('change' => 300, 'baserank' => 158, 'refrank' => null), "he " => array('change' => 0, 'baserank' => 2, 'refrank' => 2),
- " wh" => array('change' => 195, 'baserank' => 232, 'refrank' => 37), " ph" => array('change' => 300, 'baserank' => 220, 'refrank' => null),
- );
-
- $ranked = $this->x->_arr_rank($this->x->_trigram($str));
- $results = $this->x->detect($str);
-
- $count = count($ranked);
- $sum = 0;
-
- //foreach ($this->x->_lang_db['english'] as $key => $value) {
- foreach ($ranked as $key => $value) {
- if (isset($ranked[$key]) && isset($this->x->_lang_db['english'][$key])) {
- $difference = abs($this->x->_lang_db['english'][$key] - $ranked[$key]);
- } else {
- $difference = 300;
- }
-
- $this->assertTrue(isset($true_differences[$key]), "'$key'");
- if (isset($true_differences[$key])) {
- $this->assertEquals($true_differences[$key]['change'], $difference, "'$key'");
- }
- $sum += $difference;
- }
-
- $this->assertEquals(300, $count);
- $this->assertEquals(59490, $sum);
-
- $this->assertEquals('english', key($results));
- $this->assertEquals(198, floor(current($results)));
- next($results);
- $this->assertEquals('italian', key($results));
- $this->assertEquals(228, floor(current($results)));
- }
-
- function test_french ()
- {
- $this->x->setPerlCompatible();
- $str = "Verifions que le détecteur de langues marche";
-
- $trigrams = $this->x->_trigram($str);
- $this->assertEquals(42, count($trigrams));
- // verified in Language::Guess
-
- $ranked = $this->x->_arr_rank($trigrams);
- $this->assertEquals(0, $ranked['e l']);
-
- $correct_ranks = array(
- ' de' => 1,
- "éte" => 41,
- "dét" => 12,
- 'fio' => 18,
- 'de ' => 11,
- 'ons' => 28,
- 'ect' => 14,
- 'le ' => 24,
- 'arc' => 8,
- 'lan' => 23,
- 'es ' => 16,
- 'mar' => 25,
- " dé" => 2,
- 'ifi' => 21,
- 'gue' => 19,
- 'ur ' => 39,
- 'rch' => 31,
- 'ang' => 7,
- 'que' => 29,
- 'ngu' => 26,
- 'e d' => 13,
- 'rif' => 32,
- ' ma' => 5,
- 'tec' => 35,
- 'ns ' => 27,
- ' la' => 3,
- ' le' => 4,
- 'r d' => 30,
- 'e l' => 0,
- 'che' => 9,
- 's m' => 33,
- 'ue ' => 37,
- 'ver' => 40,
- 'teu' => 36,
- 'eri' => 15,
- 'cte' => 10,
- 'ues' => 38,
- 's q' => 34,
- 'eur' => 17,
- ' qu' => 6,
- 'he ' => 20,
- 'ion' => 22
- );
-
-
- $this->assertEquals(count($correct_ranks), count($ranked), "different number of trigrams found");
-
- $distances = array(
- ' de' => array('change' => 0, 'baserank' => 1, 'refrank' => 1),
- 'éte' => array('change' => 300, 'baserank' => 41, 'refrank' => null),
- 'dét' => array('change' => 300, 'baserank' => 12, 'refrank' => null),
- 'fio' => array('change' => 300, 'baserank' => 18, 'refrank' => null),
- 'de ' => array('change' => 9, 'baserank' => 11, 'refrank' => 2),
- 'ons' => array('change' => 11, 'baserank' => 28, 'refrank' => 39),
- 'ect' => array('change' => 300, 'baserank' => 14, 'refrank' => null),
- 'le ' => array('change' => 19, 'baserank' => 24, 'refrank' => 5),
- 'arc' => array('change' => 300, 'baserank' => 8, 'refrank' => null),
- 'lan' => array('change' => 300, 'baserank' => 23, 'refrank' => null),
- 'es ' => array('change' => 16, 'baserank' => 16, 'refrank' => 0),
- 'mar' => array('change' => 300, 'baserank' => 25, 'refrank' => null),
- ' dé' => array('change' => 59, 'baserank' => 2, 'refrank' => 61),
- 'ifi' => array('change' => 300, 'baserank' => 21, 'refrank' => null),
- 'gue' => array('change' => 300, 'baserank' => 19, 'refrank' => null),
- 'ur ' => array('change' => 12, 'baserank' => 39, 'refrank' => 27),
- 'rch' => array('change' => 300, 'baserank' => 31, 'refrank' => null),
- 'ang' => array('change' => 300, 'baserank' => 7, 'refrank' => null),
- 'que' => array('change' => 5, 'baserank' => 29, 'refrank' => 24),
- 'ngu' => array('change' => 300, 'baserank' => 26, 'refrank' => null),
- 'e d' => array('change' => 2, 'baserank' => 13, 'refrank' => 15),
- 'rif' => array('change' => 300, 'baserank' => 32, 'refrank' => null),
- ' ma' => array('change' => 89, 'baserank' => 5, 'refrank' => 94),
- 'tec' => array('change' => 300, 'baserank' => 35, 'refrank' => null),
- 'ns ' => array('change' => 6, 'baserank' => 27, 'refrank' => 21),
- ' la' => array('change' => 6, 'baserank' => 3, 'refrank' => 9),
- ' le' => array('change' => 1, 'baserank' => 4, 'refrank' => 3),
- 'r d' => array('change' => 202, 'baserank' => 30, 'refrank' => 232),
- 'e l' => array('change' => 14, 'baserank' => 0, 'refrank' => 14),
- 'che' => array('change' => 300, 'baserank' => 9, 'refrank' => null),
- 's m' => array('change' => 180, 'baserank' => 33, 'refrank' => 213),
- 'ue ' => array('change' => 7, 'baserank' => 37, 'refrank' => 30),
- 'ver' => array('change' => 117, 'baserank' => 40, 'refrank' => 157),
- 'teu' => array('change' => 300, 'baserank' => 36, 'refrank' => null),
- 'eri' => array('change' => 300, 'baserank' => 15, 'refrank' => null),
- 'cte' => array('change' => 300, 'baserank' => 10, 'refrank' => null),
- 'ues' => array('change' => 237, 'baserank' => 38, 'refrank' => 275),
- 's q' => array('change' => 300, 'baserank' => 34, 'refrank' => null),
- 'eur' => array('change' => 56, 'baserank' => 17, 'refrank' => 73),
- ' qu' => array('change' => 31, 'baserank' => 6, 'refrank' => 37),
- 'he ' => array('change' => 300, 'baserank' => 20, 'refrank' => null),
- 'ion' => array('change' => 12, 'baserank' => 22, 'refrank' => 10),
- );
-
-
-
- $french_ranks = $this->x->_lang_db['french'];
-
- $sumchange = 0;
- foreach ($ranked as $key => $value) {
- if (isset($french_ranks[$key])) {
- $difference = abs($french_ranks[$key] - $ranked[$key]);
- } else {
- $difference = 300;
- }
- $this->assertTrue(isset($distances[$key]), $key);
- if (isset($distances[$key])) {
- $this->assertEquals($distances[$key]['baserank'], $ranked[$key], "baserank for $key");
- if ($distances[$key]['refrank'] === null) {
- $this->assertArrayNotHasKey($key, $french_ranks);
- } else {
- $this->assertEquals($distances[$key]['refrank'], $french_ranks[$key], "refrank for $key");
- }
- $this->assertEquals($distances[$key]['change'], $difference, "difference for $key");
- }
-
- $sumchange += $difference;
- }
-
- $actual_result = $this->x->_distance($french_ranks, $ranked);
- $this->assertEquals($sumchange, $actual_result);
- $this->assertEquals(7091, $actual_result);
- $this->assertEquals(168, floor($sumchange/count($trigrams)));
-
- $final_result = $this->x->detect($str);
- $this->assertEquals(168, floor($final_result['french']));
- $this->assertEquals(211, $final_result['spanish']);
- }
-
- function test_russian ()
- {
- $str = 'авай проверить узнает ли наш угадатель русски язык';
-
- $this->x->setPerlCompatible();
- $trigrams = $this->x->_trigram($str);
- $ranked = $this->x->_arr_rank($trigrams);
-
- $correct_ranks = array(
- ' ру' => array('change' => 300, 'baserank' => 3, 'refrank' => null),
- 'ай ' => array('change' => 300, 'baserank' => 10, 'refrank' => null),
- 'ада' => array('change' => 300, 'baserank' => 8, 'refrank' => null),
- ' пр' => array('change' => 1, 'baserank' => 2, 'refrank' => 1),
- ' яз' => array('change' => 300, 'baserank' => 6, 'refrank' => null),
- 'ить' => array('change' => 300, 'baserank' => 24, 'refrank' => null),
- ' на' => array('change' => 1, 'baserank' => 1, 'refrank' => 0),
- 'зна' => array('change' => 153, 'baserank' => 20, 'refrank' => 173),
- 'вай' => array('change' => 300, 'baserank' => 13, 'refrank' => null),
- 'ш у' => array('change' => 300, 'baserank' => 44, 'refrank' => null),
- 'ль ' => array('change' => 300, 'baserank' => 28, 'refrank' => null),
- ' ли' => array('change' => 300, 'baserank' => 0, 'refrank' => null),
- 'сск' => array('change' => 300, 'baserank' => 37, 'refrank' => null),
- 'ть ' => array('change' => 31, 'baserank' => 40, 'refrank' => 9),
- 'ава' => array('change' => 300, 'baserank' => 7, 'refrank' => null),
- 'про' => array('change' => 18, 'baserank' => 32, 'refrank' => 14),
- 'гад' => array('change' => 300, 'baserank' => 15, 'refrank' => null),
- 'усс' => array('change' => 300, 'baserank' => 43, 'refrank' => null),
- 'ык ' => array('change' => 300, 'baserank' => 45, 'refrank' => null),
- 'ель' => array('change' => 64, 'baserank' => 17, 'refrank' => 81),
- 'язы' => array('change' => 300, 'baserank' => 47, 'refrank' => null),
- ' уг' => array('change' => 300, 'baserank' => 4, 'refrank' => null),
- 'ате' => array('change' => 152, 'baserank' => 11, 'refrank' => 163),
- 'и н' => array('change' => 63, 'baserank' => 22, 'refrank' => 85),
- 'и я' => array('change' => 300, 'baserank' => 23, 'refrank' => null),
- 'ает' => array('change' => 152, 'baserank' => 9, 'refrank' => 161),
- 'узн' => array('change' => 300, 'baserank' => 42, 'refrank' => null),
- 'ери' => array('change' => 300, 'baserank' => 18, 'refrank' => null),
- 'ли ' => array('change' => 23, 'baserank' => 27, 'refrank' => 4),
- 'т л' => array('change' => 300, 'baserank' => 38, 'refrank' => null),
- ' уз' => array('change' => 300, 'baserank' => 5, 'refrank' => null),
- 'дат' => array('change' => 203, 'baserank' => 16, 'refrank' => 219),
- 'зык' => array('change' => 300, 'baserank' => 21, 'refrank' => null),
- 'ров' => array('change' => 59, 'baserank' => 34, 'refrank' => 93),
- 'рит' => array('change' => 300, 'baserank' => 33, 'refrank' => null),
- 'ь р' => array('change' => 300, 'baserank' => 46, 'refrank' => null),
- 'ет ' => array('change' => 19, 'baserank' => 19, 'refrank' => 38),
- 'ки ' => array('change' => 116, 'baserank' => 26, 'refrank' => 142),
- 'рус' => array('change' => 300, 'baserank' => 35, 'refrank' => null),
- 'тел' => array('change' => 16, 'baserank' => 39, 'refrank' => 23),
- 'нае' => array('change' => 300, 'baserank' => 29, 'refrank' => null),
- 'й п' => array('change' => 300, 'baserank' => 25, 'refrank' => null),
- 'наш' => array('change' => 300, 'baserank' => 30, 'refrank' => null),
- 'уга' => array('change' => 300, 'baserank' => 41, 'refrank' => null),
- 'ове' => array('change' => 214, 'baserank' => 31, 'refrank' => 245),
- 'ски' => array('change' => 112, 'baserank' => 36, 'refrank' => 148),
- 'вер' => array('change' => 31, 'baserank' => 14, 'refrank' => 45),
- 'аш ' => array('change' => 300, 'baserank' => 12, 'refrank' => null),
- );
-
- $this->assertEquals(48, count($ranked));
-
-
- $russian = $this->x->_lang_db['russian'];
-
- $sumchange = 0;
- foreach ($ranked as $key => $value) {
- if (isset($russian[$key])) {
- $difference = abs($russian[$key] - $ranked[$key]);
- } else {
- $difference = 300;
- }
- $this->assertTrue(isset($correct_ranks[$key], $key));
- if (isset($correct_ranks[$key])) {
- $this->assertEquals($correct_ranks[$key]['baserank'], $ranked[$key], "baserank for $key");
- if ($correct_ranks[$key]['refrank'] === null) {
- $this->assertArrayNotHasKey($key, $russian);
- } else {
- $this->assertEquals($correct_ranks[$key]['refrank'], $russian[$key], "refrank for $key");
- }
- $this->assertEquals($correct_ranks[$key]['change'], $difference, "difference for $key");
- }
-
- $sumchange += $difference;
- }
-
- $actual_result = $this->x->_distance($russian, $ranked);
- $this->assertEquals($sumchange, $actual_result);
- $this->assertEquals(10428, $actual_result);
- $this->assertEquals(217, floor($sumchange/count($trigrams)));
-
- $final_result = $this->x->detect($str);
- $this->assertEquals(217,floor($final_result['russian']));
- }
-
- function test_ranker ()
- {
- $str = 'is it s i';
-
- $result = $this->x->_arr_rank($this->x->_trigram($str));
-
- $this->assertEquals(0, $result['s i']);
- }
-
-
- function test_count ()
- {
- $langs = $this->x->getLanguages();
-
- $count = $this->x->getLanguageCount();
-
- $this->assertEquals(count($langs), $count);
-
- foreach ($langs as $lang) {
- $this->assertTrue($this->x->languageExists($lang), $lang);
- }
- }
-
- function testLanguageExistsNameMode2()
- {
- $this->x->setNameMode(2);
- $this->assertTrue($this->x->languageExists('en'));
- $this->assertFalse($this->x->languageExists('english'));
- }
-
- function testLanguageExistsArrayNameMode2()
- {
- $this->x->setNameMode(2);
- $this->assertTrue($this->x->languageExists(array('en', 'de')));
- $this->assertFalse($this->x->languageExists(array('en', 'doesnotexist')));
- }
-
- /**
- * @expectedException Text_LanguageDetect_Exception
- * @expectedExceptionMessage Unsupported parameter type passed to languageExists()
- */
- function testLanguageExistsUnsupportedType()
- {
- $this->x->languageExists(1.23);
- }
-
- function testGetLanguages()
- {
- $langs = $this->x->getLanguages();
- $this->assertContains('english', $langs);
- $this->assertContains('swedish', $langs);
- }
-
- function testGetLanguagesNameMode2()
- {
- $this->x->setNameMode(2);
- $langs = $this->x->getLanguages();
- $this->assertContains('en', $langs);
- $this->assertContains('sv', $langs);
- }
-
- function testDetect()
- {
- $scores = $this->x->detect('Das ist ein kleiner Text für euch alle');
- $this->assertInternalType('array', $scores);
- $this->assertGreaterThan(5, count($scores));
-
- list($key, $value) = each($scores);
- $this->assertEquals('german', $key, 'text is german');
- }
-
- function testDetectNameMode2()
- {
- $this->x->setNameMode(2);
- $scores = $this->x->detect('Das ist ein kleiner Text für euch alle');
- list($key, $value) = each($scores);
- $this->assertEquals('de', $key, 'text is german');
- }
-
- function testDetectNameMode2Limit()
- {
- $this->x->setNameMode(2);
- $scores = $this->x->detect('Das ist ein kleiner Text für euch alle', 1);
- list($key, $value) = each($scores);
- $this->assertEquals('de', $key, 'text is german');
- }
-
- function testDetectSimple()
- {
- $lang = $this->x->detectSimple('Das ist ein kleiner Text für euch alle');
- $this->assertInternalType('string', $lang);
- $this->assertEquals('german', $lang, 'text is german');
- }
-
- function testDetectSimpleNameMode2()
- {
- $this->x->setNameMode(2);
- $lang = $this->x->detectSimple('Das ist ein kleiner Text für euch alle');
- $this->assertInternalType('string', $lang);
- $this->assertEquals('de', $lang, 'text is german');
- }
-
- function testDetectSimpleNoLanguages()
- {
- $this->x->omitLanguages('english', true);
- $this->x->omitLanguages('english', false);
- $this->assertNull(
- $this->x->detectSimple('Das ist ein kleiner Text für euch alle')
- );
- }
-
- function testLanguageSimilarity()
- {
- $this->x->setPerlCompatible(true);
- $eng_dan = $this->x->languageSimilarity('english', 'danish');
- $nor_dan = $this->x->languageSimilarity('norwegian', 'danish');
- $swe_dan = $this->x->languageSimilarity('swedish', 'danish');
-
- // remember, lower means more similar
- $this->assertTrue($eng_dan > $nor_dan); // english is less similar to danish than norwegian is
- $this->assertTrue($eng_dan > $swe_dan); // english is less similar to danish than swedish is
- $this->assertTrue($nor_dan < $swe_dan); // norwegian is more similar to danish than swedish
-
- // test the range of the results
- $this->assertTrue($eng_dan <= 300, $eng_dan);
- $this->assertTrue($eng_dan >= 0, $eng_dan);
-
- // test it in perl compatible mode
- $this->x->setPerlCompatible(false);
-
- $eng_dan = $this->x->languageSimilarity('english', 'danish');
- $nor_dan = $this->x->languageSimilarity('norwegian', 'danish');
- $swe_dan = $this->x->languageSimilarity('swedish', 'danish');
-
- // now higher is more similar
- $this->assertTrue($eng_dan < $nor_dan);
- $this->assertTrue($eng_dan < $swe_dan);
- $this->assertTrue($nor_dan > $swe_dan);
-
- $this->assertTrue($eng_dan <= 1, $eng_dan);
- $this->assertTrue($eng_dan >= 0, $eng_dan);
-
- $this->x->setPerlCompatible(true);
-
- $eng_all = $this->x->languageSimilarity('english');
- $this->assertEquals($this->x->getLanguageCount() - 1, count($eng_all));
- $this->assertTrue(!isset($eng_all['english']));
-
- $this->assertTrue($eng_all['italian'] < $eng_all['turkish']);
- $this->assertTrue($eng_all['french'] < $eng_all['kyrgyz']);
-
- $all = $this->x->languageSimilarity();
- $this->assertTrue(!isset($all['english']['english']));
- $this->assertTrue($all['french']['spanish'] < $all['french']['mongolian']);
- $this->assertTrue($all['spanish']['latin'] < $all['hindi']['finnish']);
- $this->assertTrue($all['russian']['uzbek'] < $all['russian']['english']);
- }
-
-
- function testLanguageSimilarityNameMode2()
- {
- $this->x->setNameMode(2);
- $this->x->setPerlCompatible(true);
- $eng_dan = $this->x->languageSimilarity('en', 'dk');
- $nor_dan = $this->x->languageSimilarity('no', 'dk');
-
- // remember, lower means more similar
- $this->assertTrue($eng_dan > $nor_dan); // english is less similar to danish than norwegian is
- }
-
- function testLanguageSimilarityUnknownLanguage()
- {
- $this->assertNull($this->x->languageSimilarity('doesnotexist'));
- }
-
- function testLanguageSimilarityUnknownLanguage2()
- {
- $this->assertNull($this->x->languageSimilarity('english', 'doesnotexist'));
- }
-
- function test_compatibility ()
- {
- $str = "I am the very model of a modern major general.";
-
-
- $this->x->setPerlCompatible(false);
- $result = $this->x->detectConfidence($str);
-
- $this->assertTrue(!is_null($result));
- $this->assertTrue(is_array($result));
- extract($result);
- $this->assertEquals('english', $language);
- $this->assertTrue($similarity <= 1 && $similarity >= 0, $similarity);
- $this->assertTrue($confidence <= 1 && $confidence >= 0, $confidence);
-
- $this->x->setPerlCompatible(true);
- $result = $this->x->detectConfidence($str);
- extract($result, EXTR_OVERWRITE);
-
- $this->assertEquals('english', $language);
-
- // technically the lowest possible score is 0 but it's extremely unlikely to hit that
- $this->assertTrue($similarity <= 300 && $similarity >= 1, $similarity);
- $this->assertTrue($confidence <= 1 && $confidence >= 0, $confidence);
-
- }
-
- function testDetectConfidenceNoText()
- {
- $this->assertNull($this->x->detectConfidence(''));
- }
-
- function test_omit_error ()
- {
- $str = 'On January 29, 1737, Thomas Paine was born in Thetford, England. His father, a corseter, had grand visions for his son, but by the age of 12, Thomas had failed out of school. The young Paine began apprenticing for his father, but again, he failed.';
-
- $myobj = new Text_LanguageDetect;
-
- $result = $myobj->detectSimple($str);
- $this->assertEquals('english', $result);
-
- // omit all languages and you should get an error
- $myobj->omitLanguages($myobj->getLanguages());
-
- $result = $myobj->detectSimple($str);
-
- $this->assertNull($result, gettype($result));
- }
-
- function test_cyrillic ()
- {
- // tests whether the cyrillic lower-casing works
-
- $uppercased = 'А Б В Г Д Е Ж З И Й К Л М Н О П'
- . 'Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я';
-
- $lowercased = 'а б в г д е ж з и й к л м н о п'
- . 'р с т у ф х ц ч ш щ ъ ы ь э ю я';
-
- $this->assertEquals(strlen($uppercased), strlen($lowercased));
-
- $i = 0;
- $j = 0;
- $new_u = '';
- while ($i < strlen($uppercased)) {
- $u = Text_LanguageDetect::_next_char($uppercased, $i, true);
- $l = Text_LanguageDetect::_next_char($lowercased, $j, true);
- $this->assertEquals($u, $l);
-
- $new_u .= $u;
- }
-
- $this->assertEquals($i, $j);
- $this->assertEquals($i, strlen($lowercased));
- if (function_exists('mb_strtolower')) {
- $this->assertEquals($new_u, mb_strtolower($uppercased, 'UTF-8'));
- }
- }
-
- function test_block_detection()
- {
- $exp_output = <<<EOF
-Array
-(
- [Basic Latin] => 37
- [CJK Unified Ideographs] => 2
- [Hiragana] => 1
- [Latin-1 Supplement] => 4
-)
-EOF;
- $teststr = 'lsdkfj あ 葉 叶 slskdfj s Åj;sdklf ÿjs;kdjåf î';
- $result = $this->x->detectUnicodeBlocks($teststr, false);
-
- ksort($result);
- ob_start();
- print_r($result);
- $str_result = ob_get_contents();
- ob_end_clean();
- $this->assertEquals(trim($exp_output), trim($str_result));
-
- // test whether skipping the spaces reduces the basic latin count
- $result2 = $this->x->detectUnicodeBlocks($teststr, true);
- $this->assertTrue($result2['Basic Latin'] < $result['Basic Latin']);
-
- $result3 = $this->x->unicodeBlockName('и');
- $this->assertEquals('Cyrillic', $result3);
-
- $this->assertEquals('Basic Latin', $this->x->unicodeBlockName('A'));
-
- // see what happens when you try an unassigned range
- $utf8 = $this->code2utf(0x0800);
-
- $this->assertEquals(false, $this->x->unicodeBlockName($utf8));
-
- // try unicode vals in several different ranges
- $unicode['Supplementary Private Use Area-A'] = 0xF0001;
- $unicode['Supplementary Private Use Area-B'] = 0x100001;
- $unicode['CJK Unified Ideographs Extension B'] = 0x20001;
- $unicode['Ugaritic'] = 0x10381;
- $unicode['Gothic'] = 0x10331;
- $unicode['Low Surrogates'] = 0xDC01;
- $unicode['CJK Unified Ideographs'] = 0x4E00;
- $unicode['Glagolitic'] = 0x2C00;
- $unicode['Latin Extended Additional'] = 0x1EFF;
- $unicode['Devanagari'] = 0x0900;
- $unicode['Hebrew'] = 0x0590;
- $unicode['Latin Extended-B'] = 0x024F;
- $unicode['Latin-1 Supplement'] = 0x00FF;
- $unicode['Basic Latin'] = 0x007F;
-
- foreach ($unicode as $range => $codepoint) {
- $result = $this->x->unicodeBlockName($this->code2utf($codepoint));
- $this->assertEquals($range, $result, $codepoint);
- }
- }
-
- /**
- * @expectedException Text_LanguageDetect_Exception
- * @expectedExceptionMessage Pass a single char only to this method
- */
- function testUnicodeBlockNameParamString()
- {
- $this->x->unicodeBlockName('foo bar baz');
- }
-
- /**
- * @expectedException Text_LanguageDetect_Exception
- * @expectedExceptionMessage Input must be of type string or int
- */
- function testUnicodeBlockNameUnsupportedParamType()
- {
- $this->x->unicodeBlockName(1.23);
- }
-
-
- // utility function
- // found in http://www.php.net/manual/en/function.utf8-encode.php#49336
- function code2utf($num)
- {
- if ($num < 128) {
- return chr($num);
-
- } elseif ($num < 2048) {
- return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
-
- } elseif ($num < 65536) {
- return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
-
- } elseif ($num < 2097152) {
- return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
- } else {
- return '';
- }
- }
-
- function test_utf8len()
- {
- $str = 'Iñtërnâtiônàlizætiøn';
- $this->assertEquals(20, $this->x->utf8strlen($str), utf8_decode($str));
-
- $str = '時期日';
- $this->assertEquals(3, $this->x->utf8strlen($str), utf8_decode($str));
- }
-
- function test_unicode()
- {
- // test whether it can get the right unicode values for utf8 chars
-
- $chars['ת'] = 0x5EA;
-
- $chars['ç'] = 0x00E7;
-
- $chars['a'] = 0x0061;
-
- $chars['Φ'] = 0x03A6;
-
- $chars['И'] = 0x0418;
-
- $chars['ڰ'] = 0x6B0;
-
- $chars['Ụ'] = 0x1EE4;
-
- $chars['놔'] = 0xB194;
-
- $chars['遮'] = 0x906E;
-
- $chars['怀'] = 0x6000;
-
- $chars['ฤ'] = 0x0E24;
-
- $chars['Я'] = 0x042F;
-
- $chars['ü'] = 0x00FC;
-
- $chars['Đ'] = 0x0110;
-
- $chars['א'] = 0x05D0;
-
-
- foreach ($chars as $utf8 => $unicode) {
- $this->assertEquals($unicode, $this->x->_utf8char2unicode($utf8), $utf8);
- }
- }
-
- function test_unicode_off()
- {
-
- // see what happens when you turn the unicode setting off
-
- $myobj = new Text_LanguageDetect;
-
- $str = 'This is a delightful sample of English text';
-
- $myobj->useUnicodeBlocks(true);
- $result1 = $myobj->detectConfidence($str);
-
- $myobj->useUnicodeBlocks(false);
- $result2 = $myobj->detectConfidence($str);
-
- $this->assertEquals($result1, $result2);
-
- // note this test doesn't tell if unicode narrowing was actually used or not
- }
-
-
- function test_detection()
- {
-
- // WARNING: the below lines may make your terminal go ape! be warned
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- // test strings from the test module used by perl's Language::Guess
-
- $testarr = array(
- "english" => "This is a test of the language checker",
- "french" => "Verifions que le détecteur de langues marche",
- "polish" => "Sprawdźmy, czy odgadywacz języków pracuje",
- "russian" => "Давай проверим узнает ли нашь угадыватель русский язык",
- "spanish" => "La respuesta de los acreedores a la oferta argentina para salir del default no ha sido muy positiv",
- "romanian" => "în acest sens aparţinînd Adunării Generale a organizaţiei, în ciuda faptului că mai multe dintre solicitările organizaţiei privind organizarea scrutinului nu au fost soluţionate",
- "albanian" => "kaluan ditën e fundit të fushatës në shtetet kryesore për të siguruar sa më shumë votues.",
- "danish" => "På denne side bringer vi billeder fra de mange forskellige forberedelser til arrangementet, efterhånden som vi får dem ",
- "swedish" => "Vi säger att Frälsningen är en gåva till alla, fritt och för intet. Men som vi nämnt så finns det två villkor som måste",
- "norwegian" => "Nominasjonskomiteen i Akershus KrF har skviset ut Einar Holstad fra stortingslisten. Ytre Enebakk-mannen har plass p Stortinget s lenge Valgerd Svarstad Haugland sitter i",
- "finnish" => "on julkishallinnon verkkopalveluiden yhteinen osoite. Kansalaisten arkielämää helpottavaa tietoa on koottu eri aihealueisiin",
- "estonian" => "Ennetamaks reisil ebameeldivaid vahejuhtumeid vii end kurssi reisidokumentide ja viisade reeglitega ning muu praktilise informatsiooniga",
- "hungarian" => "Hiába jön létre az önkéntes magyar haderő, hiába nem lesz többé bevonulás, változatlanul fennmarad a hadkötelezettség intézménye",
- "uzbek" => "милиция ва уч солиқ идораси ходимлари яраланган. Шаҳарда хавфсизлик чоралари кучайтирилган.",
-
-
- "czech" => "Francouzský ministr financí zmírnil výhrady vůči nízkým firemním daním v nových členských státech EU",
- "dutch" => "Die kritiek was volgens hem bitter hard nodig, omdat Nederland binnen een paar jaar in een soort Belfast zou dreigen te nderen",
-
- "croatian" => "biće prilično izjednačena, sugerišu najnovije ankete. Oba kandidata tvrde da su sposobni da dobiju rat protiv terorizma",
-
- "romanian" => "în acest sens aparţinînd Adunării Generale a organizaţiei, în ciuda faptului că mai multe dintre solicitările organizaţiei ivind organizarea scrutinului nu au fost soluţionate",
-
- "turkish" => "yakın tarihin en çekişmeli başkanlık seçiminde oy verme işlemi sürerken, katılımda rekor bekleniyor.",
-
- "kyrgyz" => "көрбөгөндөй элдик толкундоо болуп, Кокон шаарынын көчөлөрүндө бир нече миң киши нааразылык билдирди.",
-
-
- "albanian" => "kaluan ditën e fundit të fushatës në shtetet kryesore për të siguruar sa më shumë votues.",
-
-
- "azeri" => "Daxil olan xəbərlərdə deyilir ki, 6 nəfər Bağdadın mərkəzində yerləşən Təhsil Nazirliyinin binası yaxınlığında baş vermiş partlayış zamanı həlak olub.",
-
-
- "macedonian" => "на јавното мислење покажуваат дека трката е толку тесна, што се очекува двајцата соперници да ја прекршат традицијата и да се појават и на самиот изборен ден.",
-
-
-
- "kazakh" => "Сайлау нәтижесінде дауыстардың басым бөлігін ел премьер министрі Виктор Янукович пен оның қарсыласы, оппозиция жетекшісі Виктор Ющенко алды.",
-
-
- "bulgarian" => " е готов да даде гаранции, че няма да прави ядрено оръжие, ако му се разреши мирна атомна програма",
-
-
- "arabic" => " ملايين الناخبين الأمريكيين يدلون بأصواتهم وسط إقبال قياسي على انتخابات هي الأشد تنافسا منذ عقود",
-
- );
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- // should be safe at this point
-
-
- $languages = $this->x->getLanguages();
- foreach (array_keys($testarr) as $key) {
- $this->assertTrue(in_array($key, $languages), "$key was not in known languages");
- }
-
- foreach ($testarr as $key=>$value) {
- $this->assertEquals($key, $this->x->detectSimple($value));
- }
- }
-
-
- public function test_convertFromNameMode0()
- {
- $this->assertEquals(
- 'english',
- $this->x->_convertFromNameMode('english')
- );
- }
-
- public function test_convertFromNameMode2String()
- {
- $this->x->setNameMode(2);
- $this->assertEquals(
- 'english',
- $this->x->_convertFromNameMode('en')
- );
- }
-
- public function test_convertFromNameMode3String()
- {
- $this->x->setNameMode(3);
- $this->assertEquals(
- 'english',
- $this->x->_convertFromNameMode('eng')
- );
- }
-
- public function test_convertFromNameMode2ArrayVal()
- {
- $this->x->setNameMode(2);
- $this->assertEquals(
- array('english', 'german'),
- $this->x->_convertFromNameMode(array('en', 'de'))
- );
- }
-
- public function test_convertFromNameMode2ArrayKey()
- {
- $this->x->setNameMode(2);
- $this->assertEquals(
- array('english' => 'foo', 'german' => 'test'),
- $this->x->_convertFromNameMode(
- array('en' => 'foo', 'de' => 'test'),
- true
- )
- );
- }
-
- public function test_convertFromNameMode3ArrayVal()
- {
- $this->x->setNameMode(3);
- $this->assertEquals(
- array('english', 'german'),
- $this->x->_convertFromNameMode(array('eng', 'deu'))
- );
- }
-
- public function test_convertFromNameMode3ArrayKey()
- {
- $this->x->setNameMode(3);
- $this->assertEquals(
- array('english' => 'foo', 'german' => 'test'),
- $this->x->_convertFromNameMode(
- array('eng' => 'foo', 'deu' => 'test'),
- true
- )
- );
- }
-
- public function test_convertToNameMode0()
- {
- $this->assertEquals(
- 'english',
- $this->x->_convertToNameMode('english')
- );
- }
-
- public function test_convertToNameMode2String()
- {
- $this->x->setNameMode(2);
- $this->assertEquals(
- 'en',
- $this->x->_convertToNameMode('english')
- );
- }
-
- public function test_convertToNameMode3String()
- {
- $this->x->setNameMode(3);
- $this->assertEquals(
- 'eng',
- $this->x->_convertToNameMode('english')
- );
- }
-
- public function test_convertToNameMode2ArrayVal()
- {
- $this->x->setNameMode(2);
- $this->assertEquals(
- array('en', 'de'),
- $this->x->_convertToNameMode(array('english', 'german'))
- );
- }
-
- public function test_convertToNameMode2ArrayKey()
- {
- $this->x->setNameMode(2);
- $this->assertEquals(
- array('en' => 'foo', 'de' => 'test'),
- $this->x->_convertToNameMode(
- array('english' => 'foo', 'german' => 'test'),
- true
- )
- );
- }
-
- public function test_convertToNameMode3ArrayVal()
- {
- $this->x->setNameMode(3);
- $this->assertEquals(
- array('eng', 'deu'),
- $this->x->_convertToNameMode(array('english', 'german'))
- );
- }
-
- public function test_convertToNameMode3ArrayKey()
- {
- $this->x->setNameMode(3);
- $this->assertEquals(
- array('eng' => 'foo', 'deu' => 'test'),
- $this->x->_convertToNameMode(
- array('english' => 'foo', 'german' => 'test'),
- true
- )
- );
- }
-}
diff --git a/library/langdet/tests/Text_LanguageDetect_ISO639Test.php b/library/langdet/tests/Text_LanguageDetect_ISO639Test.php
deleted file mode 100644
index e01d715e1..000000000
--- a/library/langdet/tests/Text_LanguageDetect_ISO639Test.php
+++ /dev/null
@@ -1,72 +0,0 @@
-<?php
-set_include_path(
- __DIR__ . '/../' . PATH_SEPARATOR . get_include_path()
-);
-
-require_once 'Text/LanguageDetect/ISO639.php';
-
-class Text_LanguageDetect_ISO639Test extends PHPUnit_Framework_TestCase
-{
- public function testNameToCode2()
- {
- $this->assertEquals(
- 'de',
- Text_LanguageDetect_ISO639::nameToCode2('german')
- );
- }
-
- public function testNameToCode2Fail()
- {
- $this->assertNull(
- Text_LanguageDetect_ISO639::nameToCode2('doesnotexist')
- );
- }
-
- public function testNameToCode3()
- {
- $this->assertEquals(
- 'fra',
- Text_LanguageDetect_ISO639::nameToCode3('french')
- );
- }
-
- public function testNameToCode3Fail()
- {
- $this->assertNull(
- Text_LanguageDetect_ISO639::nameToCode3('doesnotexist')
- );
- }
-
- public function testCode2ToName()
- {
- $this->assertEquals(
- 'english',
- Text_LanguageDetect_ISO639::code2ToName('en')
- );
- }
-
- public function testCode2ToNameFail()
- {
- $this->assertNull(
- Text_LanguageDetect_ISO639::code2ToName('nx')
- );
- }
-
- public function testCode3ToName()
- {
- $this->assertEquals(
- 'romanian',
- Text_LanguageDetect_ISO639::code3ToName('rom')
- );
- }
-
- public function testCode3ToNameFail()
- {
- $this->assertNull(
- Text_LanguageDetect_ISO639::code3ToName('nxx')
- );
- }
-
-}
-
-?> \ No newline at end of file
diff --git a/tests/unit/includes/FeedutilsText.php b/tests/unit/includes/FeedutilsTest.php
index 932a1b3a1..d27df4939 100644
--- a/tests/unit/includes/FeedutilsText.php
+++ b/tests/unit/includes/FeedutilsTest.php
@@ -24,7 +24,9 @@ class FeedutilsTest extends UnitTestCase {
$b = ['attribs' => ['' => [
'rel' => 'rel_value',
'type' => 'type_value',
- 'href' => 'href_value'
+ 'href' => 'href_value',
+ 'length' => 'length_value',
+ 'title' => 'title_value'
]]];
$blink1 = ['link1' => $b];
$bresult[] = $b['attribs'][''];
@@ -37,6 +39,9 @@ class FeedutilsTest extends UnitTestCase {
//Illegal string offset 'attribs'
}*/
+ /**
+ * @uses ::xmlify
+ */
public function test_atom_author() {
$this->assertEquals('', atom_author('', 'nick', 'name', 'uri', 72, 72, 'png', 'photourl'));
@@ -47,7 +52,7 @@ class FeedutilsTest extends UnitTestCase {
<link rel="photo" type="png" media:width="72" media:height="72" href="http://photourl" />
<link rel="avatar" type="png" media:width="72" media:height="72" href="http://photourl" />
<poco:preferredUsername>nick</poco:preferredUsername>
- <poco:displayName>name<poco:displayName>
+ <poco:displayName>name</poco:displayName>
</tag>';
$this->assertXmlStringEqualsXmlString($a, atom_author('tag', 'nick', 'name', 'uri', 72, 72, 'png', 'http://photourl'));
diff --git a/tests/unit/includes/LanguageTest.php b/tests/unit/includes/LanguageTest.php
index 789dbe80b..8bf9ca766 100644
--- a/tests/unit/includes/LanguageTest.php
+++ b/tests/unit/includes/LanguageTest.php
@@ -52,7 +52,6 @@ class LanguageTest extends UnitTestCase {
// Can not unit test detect_language(), therefore test the used library
// only for now to find regressions on library updates.
- require_once('library/langdet/Text/LanguageDetect.php');
$l = new Text_LanguageDetect;
// return 2-letter ISO 639-1 (en) language code
$l->setNameMode(2);
@@ -71,8 +70,8 @@ class LanguageTest extends UnitTestCase {
],
'English' => [
'English is a West Germanic language that was first spoken in early medieval England and is now a global lingua franca.[4][5] Named after the Angles, one of the Germanic tribes that migrated to England, it ultimately derives its name from the Anglia (Angeln) peninsula in the Baltic Sea. It is closely related to the Frisian languages, but its vocabulary has been significantly influenced by other Germanic languages, particularly Norse (a North Germanic language), as well as by Latin and Romance languages, especially French.',
- 'da', // nearly impossible to find a wikipedia article which is detected as english
- 0.000367 // minimum confidence is checked in detect_language(), but that is not yet unit testable
+ 'en',
+ 0.078422
],
'German' => [
'Deutschland ist ein Bundesstaat in Mitteleuropa. Er besteht aus 16 Ländern und ist als freiheitlich-demokratischer und sozialer Rechtsstaat verfasst. Die Bundesrepublik Deutschland stellt die jüngste Ausprägung des deutschen Nationalstaates dar. Mit rund 82,8 Millionen Einwohnern (31. Dezember 2016) zählt Deutschland zu den dicht besiedelten Flächenstaaten.',
diff --git a/vendor/composer/include_paths.php b/vendor/composer/include_paths.php
new file mode 100644
index 000000000..4b72402c9
--- /dev/null
+++ b/vendor/composer/include_paths.php
@@ -0,0 +1,10 @@
+<?php
+
+// include_paths.php @generated by Composer
+
+$vendorDir = dirname(dirname(__FILE__));
+$baseDir = dirname($vendorDir);
+
+return array(
+ $vendorDir . '/pear/text_languagedetect',
+);
diff --git a/vendor/pear/text_languagedetect/README.rst b/vendor/pear/text_languagedetect/README.rst
new file mode 100644
index 000000000..9381c7f7e
--- /dev/null
+++ b/vendor/pear/text_languagedetect/README.rst
@@ -0,0 +1,157 @@
+*******************
+Text_LanguageDetect
+*******************
+PHP library to identify human languages from text samples.
+Returns confidence scores for each.
+
+
+Installation
+============
+
+PEAR
+----
+::
+
+ $ pear install Text_LanguageDetect
+
+Composer
+--------
+::
+
+ $ composer require pear/text_languagedetect
+
+
+Usage
+=====
+Also see the examples in the ``docs/`` directory and
+the `official documentation`__.
+
+__ http://pear.php.net/package/Text_LanguageDetect/docs
+
+Language detection
+------------------
+Simple language detection::
+
+ <?php
+ require_once 'Text/LanguageDetect.php';
+
+ $text = 'Was wäre, wenn ich Ihnen das jetzt sagen würde?';
+
+ $ld = new Text_LanguageDetect();
+ $language = $ld->detectSimple($text);
+
+ echo $language;
+ //output: german
+
+Show the three most probable languages with their confidence score::
+
+ <?php
+ require_once 'Text/LanguageDetect.php';
+
+ $text = 'Was wäre, wenn ich Ihnen das jetzt sagen würde?';
+
+ $ld = new Text_LanguageDetect();
+ //3 most probable languages
+ $results = $ld->detect($text, 3);
+
+ foreach ($results as $language => $confidence) {
+ echo $language . ': ' . number_format($confidence, 2) . "\n";
+ }
+
+ //output:
+ //german: 0.35
+ //dutch: 0.25
+ //swedish: 0.20
+ ?>
+
+
+Language code
+-------------
+Instead of returning the full language name, ISO 639-2 two and three
+letter codes can be returned::
+
+ <?php
+ require_once 'Text/LanguageDetect.php';
+ $ld = new Text_LanguageDetect();
+
+ //will output the ISO 639-1 two-letter language code
+ // "de"
+ $ld->setNameMode(2);
+ echo $ld->detectSimple('Das ist ein kleiner Text') . "\n";
+
+ //will output the ISO 639-2 three-letter language code
+ // "deu"
+ $ld->setNameMode(3);
+ echo $ld->detectSimple('Das ist ein kleiner Text') . "\n";
+ ?>
+
+
+Supported languages
+===================
+- albanian
+- arabic
+- azeri
+- bengali
+- bulgarian
+- cebuano
+- croatian
+- czech
+- danish
+- dutch
+- english
+- estonian
+- farsi
+- finnish
+- french
+- german
+- hausa
+- hawaiian
+- hindi
+- hungarian
+- icelandic
+- indonesian
+- italian
+- kazakh
+- kyrgyz
+- latin
+- latvian
+- lithuanian
+- macedonian
+- mongolian
+- nepali
+- norwegian
+- pashto
+- pidgin
+- polish
+- portuguese
+- romanian
+- russian
+- serbian
+- slovak
+- slovene
+- somali
+- spanish
+- swahili
+- swedish
+- tagalog
+- turkish
+- ukrainian
+- urdu
+- uzbek
+- vietnamese
+- welsh
+
+
+Links
+=====
+Homepage
+ http://pear.php.net/package/Text_LanguageDetect
+Bug tracker
+ http://pear.php.net/bugs/search.php?cmd=display&package_name[]=Text_LanguageDetect
+Documentation
+ http://pear.php.net/package/Text_LanguageDetect/docs
+Unit test status
+ https://travis-ci.org/pear/Text_LanguageDetect
+
+ .. image:: https://travis-ci.org/pear/Text_LanguageDetect.svg?branch=master
+ :target: https://travis-ci.org/pear/Text_LanguageDetect
diff --git a/library/langdet/Text/LanguageDetect.php b/vendor/pear/text_languagedetect/Text/LanguageDetect.php
index 7cebbe607..420faa941 100644
--- a/library/langdet/Text/LanguageDetect.php
+++ b/vendor/pear/text_languagedetect/Text/LanguageDetect.php
@@ -1,13 +1,6 @@
<?php
-
/**
- * Detects the language of a given piece of text.
- *
- * Attempts to detect the language of a sample of text by correlating ranked
- * 3-gram frequencies to a table of 3-gram frequencies of known languages.
- *
- * Implements a version of a technique originally proposed by Cavnar & Trenkle
- * (1994): "N-Gram-Based Text Categorization"
+ * Part of Text_LanguageDetect
*
* PHP version 5
*
@@ -15,20 +8,24 @@
* @package Text_LanguageDetect
* @author Nicholas Pisarro <infinityminusnine+pear@gmail.com>
* @copyright 2005-2006 Nicholas Pisarro
- * @license http://www.debian.org/misc/bsd.license BSD
- * @version SVN: $Id: LanguageDetect.php 322353 2012-01-16 08:41:43Z cweiske $
+ * @license BSD http://www.opensource.org/licenses/bsd-license.php
* @link http://pear.php.net/package/Text_LanguageDetect/
- * @link http://langdetect.blogspot.com/
*/
-require_once 'Text/LanguageDetect/Exception.php';
-require_once 'Text/LanguageDetect/Parser.php';
-require_once 'Text/LanguageDetect/ISO639.php';
+//require_once 'Text/LanguageDetect/Exception.php';
+//require_once 'Text/LanguageDetect/Parser.php';
+//require_once 'Text/LanguageDetect/ISO639.php';
/**
- * Language detection class
+ * Detects the language of a given piece of text.
+ *
+ * Attempts to detect the language of a sample of text by correlating ranked
+ * 3-gram frequencies to a table of 3-gram frequencies of known languages.
+ *
+ * Implements a version of a technique originally proposed by Cavnar & Trenkle
+ * (1994): "N-Gram-Based Text Categorization"
*
- * Requires the langauge model database (lang.dat) that should have
+ * Requires the language model database (lang.dat) that should have
* accompanied this class definition in order to be instantiated.
*
* Example usage:
@@ -60,10 +57,9 @@ require_once 'Text/LanguageDetect/ISO639.php';
* @package Text_LanguageDetect
* @author Nicholas Pisarro <infinityminusnine+pear@gmail.com>
* @copyright 2005 Nicholas Pisarro
- * @license http://www.debian.org/misc/bsd.license BSD
+ * @license BSD http://www.opensource.org/licenses/bsd-license.php
* @version Release: @package_version@
* @link http://pear.php.net/package/Text_LanguageDetect/
- * @todo allow users to generate their own language models
*/
class Text_LanguageDetect
{
@@ -73,10 +69,9 @@ class Text_LanguageDetect
* If this value starts with a slash (/) or a dot (.) the value of
* $this->_data_dir will be ignored
*
- * @var string
- * @access private
+ * @var string
*/
- var $_db_filename = 'lang.dat';
+ protected $_db_filename = 'lang.dat';
/**
* The filename that stores the unicode block definitions
@@ -85,83 +80,74 @@ class Text_LanguageDetect
* $this->_data_dir will be ignored
*
* @var string
- * @access private
*/
- var $_unicode_db_filename = 'unicode_blocks.dat';
+ protected $_unicode_db_filename = 'unicode_blocks.dat';
/**
* The data directory
*
* Should be set by PEAR installer
*
- * @var string
- * @access private
+ * @var string
*/
- var $_data_dir = '@data_dir@';
+ protected $_data_dir = '@data_dir@';
/**
* The trigram data for comparison
*
* Will be loaded on start from $this->_db_filename
*
- * @var array
- * @access private
+ * @var array
*/
- var $_lang_db = array();
+ protected $_lang_db = array();
/**
- * stores the map of the trigram data to unicode characters
+ * Stores the map of the trigram data to unicode characters
*
- * @access private
* @var array
*/
- var $_unicode_map;
+ protected $_unicode_map;
/**
* The size of the trigram data arrays
*
- * @var int
- * @access private
+ * @var int
*/
- var $_threshold = 300;
+ protected $_threshold = 300;
/**
- * the maximum possible score.
+ * The maximum possible score.
*
- * needed for score normalization. Different depending on the
+ * Needed for score normalization. Different depending on the
* perl compatibility setting
*
- * @access private
- * @var int
- * @see setPerlCompatible()
+ * @var int
+ * @see setPerlCompatible()
*/
- var $_max_score = 0;
+ protected $_max_score = 0;
/**
* Whether or not to simulate perl's Language::Guess exactly
*
- * @access private
- * @var bool
- * @see setPerlCompatible()
+ * @var bool
+ * @see setPerlCompatible()
*/
- var $_perl_compatible = false;
+ protected $_perl_compatible = false;
/**
* Whether to use the unicode block detection to speed up processing
*
- * @access private
* @var bool
*/
- var $_use_unicode_narrowing = true;
+ protected $_use_unicode_narrowing = true;
/**
- * stores the result of the clustering operation
+ * Stores the result of the clustering operation
*
- * @access private
- * @var array
- * @see clusterLanguages()
+ * @var array
+ * @see clusterLanguages()
*/
- var $_clusters;
+ protected $_clusters;
/**
* Which type of "language names" are accepted and returned:
@@ -170,7 +156,7 @@ class Text_LanguageDetect
* 2 - 2-letter ISO 639-1 code ("en")
* 3 - 3-letter ISO 639-2 code ("eng")
*/
- var $_name_mode = 0;
+ protected $_name_mode = 0;
/**
* Constructor
@@ -178,7 +164,7 @@ class Text_LanguageDetect
* Will attempt to load the language database. If it fails, you will get
* an exception.
*/
- function __construct()
+ public function __construct()
{
$data = $this->_readdb($this->_db_filename);
$this->_checkTrigram($data['trigram']);
@@ -200,9 +186,8 @@ class Text_LanguageDetect
* @param string $fname File name to load
*
* @return string expected path to the language model database
- * @access private
*/
- function _get_data_loc($fname)
+ protected function _get_data_loc($fname)
{
if ($fname{0} == '/' || $fname{0} == '.') {
// if filename starts with a slash, assume it's an absolute pathname
@@ -229,9 +214,8 @@ class Text_LanguageDetect
*
* @return array the language model data
* @throws Text_LanguageDetect_Exception
- * @access private
*/
- function _readdb($fname)
+ protected function _readdb($fname)
{
// finds the correct data dir
$fname = $this->_get_data_loc($fname);
@@ -259,9 +243,8 @@ class Text_LanguageDetect
* @param array $trigram Trigram data from database
*
* @return void
- * @access private
*/
- function _checkTrigram($trigram)
+ protected function _checkTrigram($trigram)
{
if (!is_array($trigram)) {
if (ini_get('magic_quotes_runtime')) {
@@ -353,11 +336,10 @@ class Text_LanguageDetect
/**
* Returns the number of languages that this object can detect
*
- * @access public
* @return int the number of languages
- * @throws Text_LanguageDetect_Exception
+ * @throws Text_LanguageDetect_Exception
*/
- function getLanguageCount()
+ public function getLanguageCount()
{
return count($this->_lang_db);
}
@@ -395,11 +377,10 @@ class Text_LanguageDetect
/**
* Returns the list of detectable languages
*
- * @access public
* @return array the names of the languages known to this object<<<<<<<
- * @throws Text_LanguageDetect_Exception
+ * @throws Text_LanguageDetect_Exception
*/
- function getLanguages()
+ public function getLanguages()
{
return $this->_convertToNameMode(
array_keys($this->_lang_db)
@@ -437,7 +418,7 @@ class Text_LanguageDetect
*
* @return void
*/
- function setNameMode($name_mode)
+ public function setNameMode($name_mode)
{
$this->_name_mode = $name_mode;
}
@@ -467,10 +448,9 @@ class Text_LanguageDetect
* @param string $text text to convert
*
* @return array array of trigram frequencies
- * @access private
* @deprecated Superceded by the Text_LanguageDetect_Parser class
*/
- function _trigram($text)
+ protected function _trigram($text)
{
$s = new Text_LanguageDetect_Parser($text);
$s->prepareTrigram();
@@ -488,9 +468,8 @@ class Text_LanguageDetect
* @param array $arr array of trigram
*
* @return array ranks of trigrams
- * @access protected
*/
- function _arr_rank($arr)
+ protected function _arr_rank($arr)
{
// sorts alphabetically first as a standard way of breaking rank ties
@@ -518,12 +497,11 @@ class Text_LanguageDetect
/**
* Sorts an array by value breaking ties alphabetically
*
- * @param array &$arr the array to sort
+ * @param array $arr the array to sort
*
* @return void
- * @access private
*/
- function _bub_sort(&$arr)
+ protected function _bub_sort(&$arr)
{
// should do the same as this perl statement:
// sort { $trigrams{$b} == $trigrams{$a}
@@ -561,9 +539,8 @@ class Text_LanguageDetect
*
* @return int 1 if $a is greater, -1 if not
* @see _bub_sort()
- * @access private
*/
- function _sort_func($a, $b)
+ protected function _sort_func($a, $b)
{
// each is actually a key/value pair, so that it can compare using both
list($a_key, $a_value) = $a;
@@ -601,9 +578,8 @@ class Text_LanguageDetect
*
* @return int the sum of the differences between the ranks of
* the two trigram sets
- * @access private
*/
- function _distance($arr1, $arr2)
+ protected function _distance($arr1, $arr2)
{
$sumdist = 0;
@@ -634,9 +610,8 @@ class Text_LanguageDetect
*
* @return float the normalized score
* @see _distance()
- * @access private
*/
- function _normalize_score($score, $base_count = null)
+ protected function _normalize_score($score, $base_count = null)
{
if ($base_count === null) {
$base_count = $this->_threshold;
@@ -712,7 +687,7 @@ class Text_LanguageDetect
$sample_obj->setPadStart(!$this->_perl_compatible);
$sample_obj->analyze();
- $trigram_freqs =& $sample_obj->getTrigramRanks();
+ $trigram_freqs = $sample_obj->getTrigramRanks();
$trigram_count = count($trigram_freqs);
if ($trigram_count == 0) {
@@ -723,7 +698,7 @@ class Text_LanguageDetect
// use unicode block detection to narrow down the possibilities
if ($this->_use_unicode_narrowing) {
- $blocks =& $sample_obj->getUnicodeBlocks();
+ $blocks = $sample_obj->getUnicodeBlocks();
if (is_array($blocks)) {
$present_blocks = array_keys($blocks);
@@ -975,9 +950,8 @@ class Text_LanguageDetect
*
* @return mixed Block name, -1 if it failed
* @see unicodeBlockName()
- * @access protected
*/
- function _unicode_block_name($unicode, $blocks, $block_count = -1)
+ protected function _unicode_block_name($unicode, $blocks, $block_count = -1)
{
// for a reference, see
// http://www.unicode.org/Public/UNIDATA/Blocks.txt
@@ -1028,9 +1002,8 @@ class Text_LanguageDetect
*
* @return array the database of unicode block definitions
* @throws Text_LanguageDetect_Exception
- * @access protected
*/
- function _read_unicode_block_db()
+ protected function _read_unicode_block_db()
{
// since the unicode definitions are always going to be the same,
// might as well share the memory for the db with all other instances
@@ -1149,14 +1122,13 @@ class Text_LanguageDetect
* Uses a nearest neighbor technique to generate the maximum possible
* number of dendograms from the similarity data.
*
- * @access public
- * @return array language cluster data
- * @throws Text_LanguageDetect_Exception
- * @see languageSimilarity()
- * @deprecated this function will eventually be removed and placed into
+ * @return array language cluster data
+ * @throws Text_LanguageDetect_Exception
+ * @see languageSimilarity()
+ * @deprecated this function will eventually be removed and placed into
* the model generation class
*/
- function clusterLanguages()
+ public function clusterLanguages()
{
// todo: set the maximum number of clusters
// return cached result, if any
@@ -1465,7 +1437,7 @@ class Text_LanguageDetect
}
/**
- * ut8-safe strlen()
+ * UTF8-safe strlen()
*
* Returns the numbers of characters (not bytes) in a utf8 string
*
@@ -1489,10 +1461,9 @@ class Text_LanguageDetect
* @param string $char a utf8 (possibly multi-byte) char
*
* @return int unicode value
- * @access protected
* @link http://en.wikipedia.org/wiki/UTF-8
*/
- function _utf8char2unicode($char)
+ protected function _utf8char2unicode($char)
{
// strlen() here will actually get the binary length of a single char
switch (strlen($char)) {
@@ -1529,20 +1500,19 @@ class Text_LanguageDetect
}
/**
- * utf8-safe fast character iterator
+ * UTF8-safe fast character iterator
*
* Will get the next character starting from $counter, which will then be
* incremented. If a multi-byte char the bytes will be concatenated and
* $counter will be incremeted by the number of bytes in the char.
*
* @param string $str the string being iterated over
- * @param int &$counter the iterator, will increment by reference
+ * @param int $counter the iterator, will increment by reference
* @param bool $special_convert whether to do special conversions
*
* @return char the next (possibly multi-byte) char from $counter
- * @access private
*/
- static function _next_char($str, &$counter, $special_convert = false)
+ protected static function _next_char($str, &$counter, $special_convert = false)
{
$char = $str{$counter++};
$ord = ord($char);
@@ -1634,7 +1604,7 @@ class Text_LanguageDetect
*
* @return string|array Language name
*/
- function _convertFromNameMode($lang, $convertKey = false)
+ protected function _convertFromNameMode($lang, $convertKey = false)
{
if ($this->_name_mode == 0) {
return $lang;
@@ -1674,7 +1644,7 @@ class Text_LanguageDetect
*
* @return string|array Language name
*/
- function _convertToNameMode($lang, $convertKey = false)
+ protected function _convertToNameMode($lang, $convertKey = false)
{
if ($this->_name_mode == 0) {
return $lang;
diff --git a/library/langdet/Text/LanguageDetect/Exception.php b/vendor/pear/text_languagedetect/Text/LanguageDetect/Exception.php
index 196d994f5..cdbfe13ba 100644
--- a/library/langdet/Text/LanguageDetect/Exception.php
+++ b/vendor/pear/text_languagedetect/Text/LanguageDetect/Exception.php
@@ -1,4 +1,28 @@
<?php
+/**
+ * Part of Text_LanguageDetect
+ *
+ * PHP version 5
+ *
+ * @category Text
+ * @package Text_LanguageDetect
+ * @author Nicholas Pisarro <infinityminusnine+pear@gmail.com>
+ * @license BSD http://www.opensource.org/licenses/bsd-license.php
+ * @link http://pear.php.net/package/Text_LanguageDetect/
+ */
+
+/**
+ * Part of the PEAR language detection package
+ *
+ * PHP version 5
+ *
+ * @category Text
+ * @package Text_LanguageDetect
+ * @author Nicholas Pisarro <infinityminusnine+pear@gmail.com>
+ * @license BSD http://www.opensource.org/licenses/bsd-license.php
+ * @link http://pear.php.net/package/Text_LanguageDetect/
+ * @link http://langdetect.blogspot.com/
+ */
class Text_LanguageDetect_Exception extends Exception
{
/**
diff --git a/library/langdet/Text/LanguageDetect/ISO639.php b/vendor/pear/text_languagedetect/Text/LanguageDetect/ISO639.php
index c577a2e1a..9fd76c920 100644
--- a/library/langdet/Text/LanguageDetect/ISO639.php
+++ b/vendor/pear/text_languagedetect/Text/LanguageDetect/ISO639.php
@@ -9,7 +9,6 @@
* @author Christian Weiske <cweiske@php.net>
* @copyright 2011 Christian Weiske <cweiske@php.net>
* @license http://www.debian.org/misc/bsd.license BSD
- * @version SVN: $Id$
* @link http://pear.php.net/package/Text_LanguageDetect/
*/
@@ -23,7 +22,7 @@
* @package Text_LanguageDetect
* @author Christian Weiske <cweiske@php.net>
* @copyright 2011 Christian Weiske <cweiske@php.net>
- * @license http://www.debian.org/misc/bsd.license BSD
+ * @license BSD http://www.opensource.org/licenses/bsd-license.php
* @link http://www.loc.gov/standards/iso639-2/php/code_list.php
*/
class Text_LanguageDetect_ISO639
diff --git a/library/langdet/Text/LanguageDetect/Parser.php b/vendor/pear/text_languagedetect/Text/LanguageDetect/Parser.php
index 1c20c2657..3ec177640 100644
--- a/library/langdet/Text/LanguageDetect/Parser.php
+++ b/vendor/pear/text_languagedetect/Text/LanguageDetect/Parser.php
@@ -1,16 +1,15 @@
<?php
-
/**
- * This class represents a text sample to be parsed.
+ * Part of Text_LanguageDetect
+ *
+ * PHP version 5
*
- * @category Text
- * @package Text_LanguageDetect
- * @author Nicholas Pisarro
- * @copyright 2006
- * @license BSD
- * @version CVS: $Id: Parser.php 322327 2012-01-15 17:55:59Z cweiske $
- * @link http://pear.php.net/package/Text_LanguageDetect/
- * @link http://langdetect.blogspot.com/
+ * @category Text
+ * @package Text_LanguageDetect
+ * @author Nicholas Pisarro <infinityminusnine+pear@gmail.com>
+ * @copyright 2006 Nicholas Pisarro
+ * @license BSD http://www.opensource.org/licenses/bsd-license.php
+ * @link http://pear.php.net/package/Text_LanguageDetect/
*/
/**
@@ -20,99 +19,106 @@
* class. After a new profile has been built, the data can be retrieved using
* the accessor functions.
*
- * This class is intended to be used by the Text_LanguageDetect class, not
+ * This class is intended to be used by the Text_LanguageDetect class, not
* end-users.
*
- * @category Text
- * @package Text_LanguageDetect
- * @author Nicholas Pisarro
- * @copyright 2006
- * @license BSD
- * @version release: 0.3.0
+ * @category Text
+ * @package Text_LanguageDetect
+ * @author Nicholas Pisarro <infinityminusnine+pear@gmail.com>
+ * @copyright 2006 Nicholas Pisarro
+ * @license BSD http://www.opensource.org/licenses/bsd-license.php
+ * @version Release: @package_version@
+ * @link http://pear.php.net/package/Text_LanguageDetect/
*/
class Text_LanguageDetect_Parser extends Text_LanguageDetect
{
/**
- * the piece of text being parsed
+ * The piece of text being parsed
*
- * @access private
- * @var string
+ * @var string
*/
- var $_string;
+ protected $_string;
/**
- * stores the trigram frequencies of the sample
+ * Stores the trigram frequencies of the sample
*
- * @access private
- * @var string
+ * @var string
*/
- var $_trigrams = array();
+ protected $_trigrams = array();
/**
- * stores the trigram ranks of the sample
+ * Stores the trigram ranks of the sample
*
- * @access private
- * @var array
+ * @var array
*/
- var $_trigram_ranks = array();
+ protected $_trigram_ranks = array();
/**
- * stores the unicode blocks of the sample
+ * Stores the unicode blocks of the sample
*
- * @access private
- * @var array
+ * @var array
*/
- var $_unicode_blocks = array();
-
+ protected $_unicode_blocks = array();
+
/**
* Whether the parser should compile the unicode ranges
- *
- * @access private
- * @var bool
+ *
+ * @var bool
*/
- var $_compile_unicode = false;
+ protected $_compile_unicode = false;
/**
* Whether the parser should compile trigrams
*
- * @access private
- * @var bool
+ * @var bool
*/
- var $_compile_trigram = false;
+ protected $_compile_trigram = false;
/**
* Whether the trigram parser should pad the beginning of the string
*
- * @access private
- * @var bool
+ * @var bool
*/
- var $_trigram_pad_start = false;
+ protected $_trigram_pad_start = false;
/**
* Whether the unicode parser should skip non-alphabetical ascii chars
*
- * @access private
- * @var bool
+ * @var bool
*/
- var $_unicode_skip_symbols = true;
+ protected $_unicode_skip_symbols = true;
/**
* Constructor
*
- * @access private
- * @param string $string string to be parsed
+ * @param string $string string to be parsed
*/
- function Text_LanguageDetect_Parser($string) {
+ public function __construct($string)
+ {
$this->_string = $string;
}
/**
+ * PHP 4 constructor for backwards compatibility.
+ *
+ * @param string $string string to be parsed
+ *
+ * @return void
+ */
+ public function Text_LanguageDetect_Parser($string)
+ {
+ self::__construct($string);
+ }
+
+ /**
* Returns true if a string is suitable for parsing
*
- * @param string $str input string to test
- * @return bool true if acceptable, false if not
+ * @param string $str input string to test
+ *
+ * @return bool true if acceptable, false if not
*/
- public static function validateString($str) {
+ public static function validateString($str)
+ {
if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) {
return true;
} else {
@@ -121,34 +127,37 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
}
/**
- * turn on/off trigram counting
+ * Turn on/off trigram counting
*
- * @access public
- * @param bool $bool true for on, false for off
+ * @param bool $bool true for on, false for off
+ *
+ * @return void
*/
- function prepareTrigram($bool = true)
+ public function prepareTrigram($bool = true)
{
$this->_compile_trigram = $bool;
}
/**
- * turn on/off unicode block counting
+ * Turn on/off unicode block counting
+ *
+ * @param bool $bool true for on, false for off
*
- * @access public
- * @param bool $bool true for on, false for off
+ * @return void
*/
- function prepareUnicode($bool = true)
+ public function prepareUnicode($bool = true)
{
$this->_compile_unicode = $bool;
}
/**
- * turn on/off padding the beginning of the sample string
+ * Turn on/off padding the beginning of the sample string
+ *
+ * @param bool $bool true for on, false for off
*
- * @access public
- * @param bool $bool true for on, false for off
+ * @return void
*/
- function setPadStart($bool = true)
+ public function setPadStart($bool = true)
{
$this->_trigram_pad_start = $bool;
}
@@ -156,10 +165,11 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
/**
* Should the unicode block counter skip non-alphabetical ascii chars?
*
- * @access public
- * @param bool $bool true for on, false for off
+ * @param bool $bool true for on, false for off
+ *
+ * @return void
*/
- function setUnicodeSkipSymbols($bool = true)
+ public function setUnicodeSkipSymbols($bool = true)
{
$this->_unicode_skip_symbols = $bool;
}
@@ -167,10 +177,9 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
/**
* Returns the trigram ranks for the text sample
*
- * @access public
- * @return array trigram ranks in the text sample
+ * @return array Trigram ranks in the text sample
*/
- function &getTrigramRanks()
+ public function getTrigramRanks()
{
return $this->_trigram_ranks;
}
@@ -178,39 +187,37 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
/**
* Return the trigram freqency table
*
- * only used in testing to make sure the parser is working
+ * Only used in testing to make sure the parser is working
*
- * @access public
- * @return array trigram freqencies in the text sample
+ * @return array Trigram freqencies in the text sample
*/
- function &getTrigramFreqs()
+ public function getTrigramFreqs()
{
return $this->_trigram;
}
/**
- * returns the array of unicode blocks
+ * Returns the array of unicode blocks
*
- * @access public
- * @return array unicode blocks in the text sample
+ * @return array Unicode blocks in the text sample
*/
- function &getUnicodeBlocks()
+ public function getUnicodeBlocks()
{
return $this->_unicode_blocks;
}
/**
* Executes the parsing operation
- *
- * Be sure to call the set*() functions to set options and the
+ *
+ * Be sure to call the set*() functions to set options and the
* prepare*() functions first to tell it what kind of data to compute
*
* Afterwards the get*() functions can be used to access the compiled
* information.
*
- * @access public
+ * @return void
*/
- function analyze()
+ public function analyze()
{
$len = strlen($this->_string);
$byte_counter = 0;
@@ -258,9 +265,9 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
if ($this->_compile_trigram) {
if (!($b == ' ' && ($a == ' ' || $char == ' '))) {
if (!isset($this->_trigram[$a . $b . $char])) {
- $this->_trigram[$a . $b . $char] = 1;
+ $this->_trigram[$a . $b . $char] = 1;
} else {
- $this->_trigram[$a . $b . $char]++;
+ $this->_trigram[$a . $b . $char]++;
}
}
@@ -271,10 +278,11 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
// unicode block detection
if ($this->_compile_unicode) {
if ($this->_unicode_skip_symbols
- && strlen($char) == 1
- && ($char < 'A' || $char > 'z'
- || ($char > 'Z' && $char < 'a'))
- && $char != "'") { // does not skip the apostrophe
+ && strlen($char) == 1
+ && ($char < 'A' || $char > 'z'
+ || ($char > 'Z' && $char < 'a'))
+ && $char != "'"
+ ) { // does not skip the apostrophe
// since it's included in the language
// models
@@ -297,7 +305,8 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
if ($this->_compile_unicode) {
foreach ($unicode_chars as $utf8_char => $count) {
$search_result = $this->_unicode_block_name(
- $this->_utf8char2unicode($utf8_char), $blocks, $block_count);
+ $this->_utf8char2unicode($utf8_char), $blocks, $block_count
+ );
if ($search_result != -1) {
$block_name = $search_result[2];
diff --git a/vendor/pear/text_languagedetect/composer.json b/vendor/pear/text_languagedetect/composer.json
new file mode 100644
index 000000000..fc94c6506
--- /dev/null
+++ b/vendor/pear/text_languagedetect/composer.json
@@ -0,0 +1,32 @@
+{
+ "name": "pear/text_languagedetect",
+ "description": "Identify human languages from text samples",
+ "homepage": "http://pear.php.net/package/Text_LanguageDetect",
+ "type": "library",
+ "license": "BSD-2-Clause",
+ "support": {
+ "issues": "http://pear.php.net/bugs/search.php?cmd=display&package_name[]=Text_LanguageDetect",
+ "source": "https://github.com/pear/Text_LanguageDetect"
+ },
+ "authors": [
+ {
+ "email": "taak@php.net",
+ "name": "Nicholas Pisarro",
+ "role": "Lead"
+ }
+ ],
+ "autoload": {
+ "psr-0": {
+ "Text": "./"
+ }
+ },
+ "include-path": [
+ "./"
+ ],
+ "suggest": {
+ "ext-mbstring": "May require the mbstring PHP extension"
+ },
+ "require-dev": {
+ "phpunit/phpunit": "*"
+ }
+}
diff --git a/vendor/pear/text_languagedetect/data/build-unicode_blocks.php b/vendor/pear/text_languagedetect/data/build-unicode_blocks.php
new file mode 100644
index 000000000..afa75a501
--- /dev/null
+++ b/vendor/pear/text_languagedetect/data/build-unicode_blocks.php
@@ -0,0 +1,7 @@
+<?php
+/**
+ * Generate the serialized unicode_blocks.dat file shipped with the package
+ */
+$unicode_blocks = include __DIR__ . '/unicode_blocks.php';
+file_put_contents(__DIR__ . '/unicode_blocks.dat', serialize($unicode_blocks));
+?> \ No newline at end of file
diff --git a/library/langdet/data/lang.dat b/vendor/pear/text_languagedetect/data/lang.dat
index c2a44f56e..c2a44f56e 100644
--- a/library/langdet/data/lang.dat
+++ b/vendor/pear/text_languagedetect/data/lang.dat
diff --git a/vendor/pear/text_languagedetect/data/unicode_blocks.dat b/vendor/pear/text_languagedetect/data/unicode_blocks.dat
new file mode 100644
index 000000000..1f66cac72
--- /dev/null
+++ b/vendor/pear/text_languagedetect/data/unicode_blocks.dat
@@ -0,0 +1 @@
+a:145:{i:0;a:3:{i:0;i:0;i:1;i:127;i:2;s:11:"Basic Latin";}i:1;a:3:{i:0;i:128;i:1;i:255;i:2;s:18:"Latin-1 Supplement";}i:2;a:3:{i:0;i:256;i:1;i:383;i:2;s:16:"Latin Extended-A";}i:3;a:3:{i:0;i:384;i:1;i:591;i:2;s:16:"Latin Extended-B";}i:4;a:3:{i:0;i:592;i:1;i:687;i:2;s:14:"IPA Extensions";}i:5;a:3:{i:0;i:688;i:1;i:767;i:2;s:24:"Spacing Modifier Letters";}i:6;a:3:{i:0;i:768;i:1;i:879;i:2;s:27:"Combining Diacritical Marks";}i:7;a:3:{i:0;i:880;i:1;i:1023;i:2;s:16:"Greek and Coptic";}i:8;a:3:{i:0;i:1024;i:1;i:1279;i:2;s:8:"Cyrillic";}i:9;a:3:{i:0;i:1280;i:1;i:1327;i:2;s:19:"Cyrillic Supplement";}i:10;a:3:{i:0;i:1328;i:1;i:1423;i:2;s:8:"Armenian";}i:11;a:3:{i:0;i:1424;i:1;i:1535;i:2;s:6:"Hebrew";}i:12;a:3:{i:0;i:1536;i:1;i:1791;i:2;s:6:"Arabic";}i:13;a:3:{i:0;i:1792;i:1;i:1871;i:2;s:6:"Syriac";}i:14;a:3:{i:0;i:1872;i:1;i:1919;i:2;s:17:"Arabic Supplement";}i:15;a:3:{i:0;i:1920;i:1;i:1983;i:2;s:6:"Thaana";}i:16;a:3:{i:0;i:2304;i:1;i:2431;i:2;s:10:"Devanagari";}i:17;a:3:{i:0;i:2432;i:1;i:2559;i:2;s:7:"Bengali";}i:18;a:3:{i:0;i:2560;i:1;i:2687;i:2;s:8:"Gurmukhi";}i:19;a:3:{i:0;i:2688;i:1;i:2815;i:2;s:8:"Gujarati";}i:20;a:3:{i:0;i:2816;i:1;i:2943;i:2;s:5:"Oriya";}i:21;a:3:{i:0;i:2944;i:1;i:3071;i:2;s:5:"Tamil";}i:22;a:3:{i:0;i:3072;i:1;i:3199;i:2;s:6:"Telugu";}i:23;a:3:{i:0;i:3200;i:1;i:3327;i:2;s:7:"Kannada";}i:24;a:3:{i:0;i:3328;i:1;i:3455;i:2;s:9:"Malayalam";}i:25;a:3:{i:0;i:3456;i:1;i:3583;i:2;s:7:"Sinhala";}i:26;a:3:{i:0;i:3584;i:1;i:3711;i:2;s:4:"Thai";}i:27;a:3:{i:0;i:3712;i:1;i:3839;i:2;s:3:"Lao";}i:28;a:3:{i:0;i:3840;i:1;i:4095;i:2;s:7:"Tibetan";}i:29;a:3:{i:0;i:4096;i:1;i:4255;i:2;s:7:"Myanmar";}i:30;a:3:{i:0;i:4256;i:1;i:4351;i:2;s:8:"Georgian";}i:31;a:3:{i:0;i:4352;i:1;i:4607;i:2;s:11:"Hangul Jamo";}i:32;a:3:{i:0;i:4608;i:1;i:4991;i:2;s:8:"Ethiopic";}i:33;a:3:{i:0;i:4992;i:1;i:5023;i:2;s:19:"Ethiopic Supplement";}i:34;a:3:{i:0;i:5024;i:1;i:5119;i:2;s:8:"Cherokee";}i:35;a:3:{i:0;i:5120;i:1;i:5759;i:2;s:37:"Unified Canadian Aboriginal Syllabics";}i:36;a:3:{i:0;i:5760;i:1;i:5791;i:2;s:5:"Ogham";}i:37;a:3:{i:0;i:5792;i:1;i:5887;i:2;s:5:"Runic";}i:38;a:3:{i:0;i:5888;i:1;i:5919;i:2;s:7:"Tagalog";}i:39;a:3:{i:0;i:5920;i:1;i:5951;i:2;s:7:"Hanunoo";}i:40;a:3:{i:0;i:5952;i:1;i:5983;i:2;s:5:"Buhid";}i:41;a:3:{i:0;i:5984;i:1;i:6015;i:2;s:8:"Tagbanwa";}i:42;a:3:{i:0;i:6016;i:1;i:6143;i:2;s:5:"Khmer";}i:43;a:3:{i:0;i:6144;i:1;i:6319;i:2;s:9:"Mongolian";}i:44;a:3:{i:0;i:6400;i:1;i:6479;i:2;s:5:"Limbu";}i:45;a:3:{i:0;i:6480;i:1;i:6527;i:2;s:6:"Tai Le";}i:46;a:3:{i:0;i:6528;i:1;i:6623;i:2;s:11:"New Tai Lue";}i:47;a:3:{i:0;i:6624;i:1;i:6655;i:2;s:13:"Khmer Symbols";}i:48;a:3:{i:0;i:6656;i:1;i:6687;i:2;s:8:"Buginese";}i:49;a:3:{i:0;i:7424;i:1;i:7551;i:2;s:19:"Phonetic Extensions";}i:50;a:3:{i:0;i:7552;i:1;i:7615;i:2;s:30:"Phonetic Extensions Supplement";}i:51;a:3:{i:0;i:7616;i:1;i:7679;i:2;s:38:"Combining Diacritical Marks Supplement";}i:52;a:3:{i:0;i:7680;i:1;i:7935;i:2;s:25:"Latin Extended Additional";}i:53;a:3:{i:0;i:7936;i:1;i:8191;i:2;s:14:"Greek Extended";}i:54;a:3:{i:0;i:8192;i:1;i:8303;i:2;s:19:"General Punctuation";}i:55;a:3:{i:0;i:8304;i:1;i:8351;i:2;s:27:"Superscripts and Subscripts";}i:56;a:3:{i:0;i:8352;i:1;i:8399;i:2;s:16:"Currency Symbols";}i:57;a:3:{i:0;i:8400;i:1;i:8447;i:2;s:39:"Combining Diacritical Marks for Symbols";}i:58;a:3:{i:0;i:8448;i:1;i:8527;i:2;s:18:"Letterlike Symbols";}i:59;a:3:{i:0;i:8528;i:1;i:8591;i:2;s:12:"Number Forms";}i:60;a:3:{i:0;i:8592;i:1;i:8703;i:2;s:6:"Arrows";}i:61;a:3:{i:0;i:8704;i:1;i:8959;i:2;s:22:"Mathematical Operators";}i:62;a:3:{i:0;i:8960;i:1;i:9215;i:2;s:23:"Miscellaneous Technical";}i:63;a:3:{i:0;i:9216;i:1;i:9279;i:2;s:16:"Control Pictures";}i:64;a:3:{i:0;i:9280;i:1;i:9311;i:2;s:29:"Optical Character Recognition";}i:65;a:3:{i:0;i:9312;i:1;i:9471;i:2;s:22:"Enclosed Alphanumerics";}i:66;a:3:{i:0;i:9472;i:1;i:9599;i:2;s:11:"Box Drawing";}i:67;a:3:{i:0;i:9600;i:1;i:9631;i:2;s:14:"Block Elements";}i:68;a:3:{i:0;i:9632;i:1;i:9727;i:2;s:16:"Geometric Shapes";}i:69;a:3:{i:0;i:9728;i:1;i:9983;i:2;s:21:"Miscellaneous Symbols";}i:70;a:3:{i:0;i:9984;i:1;i:10175;i:2;s:8:"Dingbats";}i:71;a:3:{i:0;i:10176;i:1;i:10223;i:2;s:36:"Miscellaneous Mathematical Symbols-A";}i:72;a:3:{i:0;i:10224;i:1;i:10239;i:2;s:21:"Supplemental Arrows-A";}i:73;a:3:{i:0;i:10240;i:1;i:10495;i:2;s:16:"Braille Patterns";}i:74;a:3:{i:0;i:10496;i:1;i:10623;i:2;s:21:"Supplemental Arrows-B";}i:75;a:3:{i:0;i:10624;i:1;i:10751;i:2;s:36:"Miscellaneous Mathematical Symbols-B";}i:76;a:3:{i:0;i:10752;i:1;i:11007;i:2;s:35:"Supplemental Mathematical Operators";}i:77;a:3:{i:0;i:11008;i:1;i:11263;i:2;s:32:"Miscellaneous Symbols and Arrows";}i:78;a:3:{i:0;i:11264;i:1;i:11359;i:2;s:10:"Glagolitic";}i:79;a:3:{i:0;i:11392;i:1;i:11519;i:2;s:6:"Coptic";}i:80;a:3:{i:0;i:11520;i:1;i:11567;i:2;s:19:"Georgian Supplement";}i:81;a:3:{i:0;i:11568;i:1;i:11647;i:2;s:8:"Tifinagh";}i:82;a:3:{i:0;i:11648;i:1;i:11743;i:2;s:17:"Ethiopic Extended";}i:83;a:3:{i:0;i:11776;i:1;i:11903;i:2;s:24:"Supplemental Punctuation";}i:84;a:3:{i:0;i:11904;i:1;i:12031;i:2;s:23:"CJK Radicals Supplement";}i:85;a:3:{i:0;i:12032;i:1;i:12255;i:2;s:15:"Kangxi Radicals";}i:86;a:3:{i:0;i:12272;i:1;i:12287;i:2;s:34:"Ideographic Description Characters";}i:87;a:3:{i:0;i:12288;i:1;i:12351;i:2;s:27:"CJK Symbols and Punctuation";}i:88;a:3:{i:0;i:12352;i:1;i:12447;i:2;s:8:"Hiragana";}i:89;a:3:{i:0;i:12448;i:1;i:12543;i:2;s:8:"Katakana";}i:90;a:3:{i:0;i:12544;i:1;i:12591;i:2;s:8:"Bopomofo";}i:91;a:3:{i:0;i:12592;i:1;i:12687;i:2;s:25:"Hangul Compatibility Jamo";}i:92;a:3:{i:0;i:12688;i:1;i:12703;i:2;s:6:"Kanbun";}i:93;a:3:{i:0;i:12704;i:1;i:12735;i:2;s:17:"Bopomofo Extended";}i:94;a:3:{i:0;i:12736;i:1;i:12783;i:2;s:11:"CJK Strokes";}i:95;a:3:{i:0;i:12784;i:1;i:12799;i:2;s:28:"Katakana Phonetic Extensions";}i:96;a:3:{i:0;i:12800;i:1;i:13055;i:2;s:31:"Enclosed CJK Letters and Months";}i:97;a:3:{i:0;i:13056;i:1;i:13311;i:2;s:17:"CJK Compatibility";}i:98;a:3:{i:0;i:13312;i:1;i:19903;i:2;s:34:"CJK Unified Ideographs Extension A";}i:99;a:3:{i:0;i:19904;i:1;i:19967;i:2;s:23:"Yijing Hexagram Symbols";}i:100;a:3:{i:0;i:19968;i:1;i:40959;i:2;s:22:"CJK Unified Ideographs";}i:101;a:3:{i:0;i:40960;i:1;i:42127;i:2;s:12:"Yi Syllables";}i:102;a:3:{i:0;i:42128;i:1;i:42191;i:2;s:11:"Yi Radicals";}i:103;a:3:{i:0;i:42752;i:1;i:42783;i:2;s:21:"Modifier Tone Letters";}i:104;a:3:{i:0;i:43008;i:1;i:43055;i:2;s:12:"Syloti Nagri";}i:105;a:3:{i:0;i:44032;i:1;i:55215;i:2;s:16:"Hangul Syllables";}i:106;a:3:{i:0;i:55296;i:1;i:56191;i:2;s:15:"High Surrogates";}i:107;a:3:{i:0;i:56192;i:1;i:56319;i:2;s:27:"High Private Use Surrogates";}i:108;a:3:{i:0;i:56320;i:1;i:57343;i:2;s:14:"Low Surrogates";}i:109;a:3:{i:0;i:57344;i:1;i:63743;i:2;s:16:"Private Use Area";}i:110;a:3:{i:0;i:63744;i:1;i:64255;i:2;s:28:"CJK Compatibility Ideographs";}i:111;a:3:{i:0;i:64256;i:1;i:64335;i:2;s:29:"Alphabetic Presentation Forms";}i:112;a:3:{i:0;i:64336;i:1;i:65023;i:2;s:27:"Arabic Presentation Forms-A";}i:113;a:3:{i:0;i:65024;i:1;i:65039;i:2;s:19:"Variation Selectors";}i:114;a:3:{i:0;i:65040;i:1;i:65055;i:2;s:14:"Vertical Forms";}i:115;a:3:{i:0;i:65056;i:1;i:65071;i:2;s:20:"Combining Half Marks";}i:116;a:3:{i:0;i:65072;i:1;i:65103;i:2;s:23:"CJK Compatibility Forms";}i:117;a:3:{i:0;i:65104;i:1;i:65135;i:2;s:19:"Small Form Variants";}i:118;a:3:{i:0;i:65136;i:1;i:65279;i:2;s:27:"Arabic Presentation Forms-B";}i:119;a:3:{i:0;i:65280;i:1;i:65519;i:2;s:29:"Halfwidth and Fullwidth Forms";}i:120;a:3:{i:0;i:65520;i:1;i:65535;i:2;s:8:"Specials";}i:121;a:3:{i:0;i:65536;i:1;i:65663;i:2;s:18:"Linear B Syllabary";}i:122;a:3:{i:0;i:65664;i:1;i:65791;i:2;s:18:"Linear B Ideograms";}i:123;a:3:{i:0;i:65792;i:1;i:65855;i:2;s:14:"Aegean Numbers";}i:124;a:3:{i:0;i:65856;i:1;i:65935;i:2;s:21:"Ancient Greek Numbers";}i:125;a:3:{i:0;i:66304;i:1;i:66351;i:2;s:10:"Old Italic";}i:126;a:3:{i:0;i:66352;i:1;i:66383;i:2;s:6:"Gothic";}i:127;a:3:{i:0;i:66432;i:1;i:66463;i:2;s:8:"Ugaritic";}i:128;a:3:{i:0;i:66464;i:1;i:66527;i:2;s:11:"Old Persian";}i:129;a:3:{i:0;i:66560;i:1;i:66639;i:2;s:7:"Deseret";}i:130;a:3:{i:0;i:66640;i:1;i:66687;i:2;s:7:"Shavian";}i:131;a:3:{i:0;i:66688;i:1;i:66735;i:2;s:7:"Osmanya";}i:132;a:3:{i:0;i:67584;i:1;i:67647;i:2;s:17:"Cypriot Syllabary";}i:133;a:3:{i:0;i:68096;i:1;i:68191;i:2;s:10:"Kharoshthi";}i:134;a:3:{i:0;i:118784;i:1;i:119039;i:2;s:25:"Byzantine Musical Symbols";}i:135;a:3:{i:0;i:119040;i:1;i:119295;i:2;s:15:"Musical Symbols";}i:136;a:3:{i:0;i:119296;i:1;i:119375;i:2;s:30:"Ancient Greek Musical Notation";}i:137;a:3:{i:0;i:119552;i:1;i:119647;i:2;s:21:"Tai Xuan Jing Symbols";}i:138;a:3:{i:0;i:119808;i:1;i:120831;i:2;s:33:"Mathematical Alphanumeric Symbols";}i:139;a:3:{i:0;i:131072;i:1;i:173791;i:2;s:34:"CJK Unified Ideographs Extension B";}i:140;a:3:{i:0;i:194560;i:1;i:195103;i:2;s:39:"CJK Compatibility Ideographs Supplement";}i:141;a:3:{i:0;i:917504;i:1;i:917631;i:2;s:4:"Tags";}i:142;a:3:{i:0;i:917760;i:1;i:917999;i:2;s:30:"Variation Selectors Supplement";}i:143;a:3:{i:0;i:983040;i:1;i:1048575;i:2;s:32:"Supplementary Private Use Area-A";}i:144;a:3:{i:0;i:1048576;i:1;i:1114111;i:2;s:32:"Supplementary Private Use Area-B";}} \ No newline at end of file
diff --git a/vendor/pear/text_languagedetect/data/unicode_blocks.php b/vendor/pear/text_languagedetect/data/unicode_blocks.php
new file mode 100644
index 000000000..2be6a19d9
--- /dev/null
+++ b/vendor/pear/text_languagedetect/data/unicode_blocks.php
@@ -0,0 +1,874 @@
+<?php
+return array (
+ 0 =>
+ array (
+ 0 => 0x0000,
+ 1 => 0x007F,
+ 2 => 'Basic Latin',
+ ),
+ 1 =>
+ array (
+ 0 => 0x0080,
+ 1 => 0x00FF,
+ 2 => 'Latin-1 Supplement',
+ ),
+ 2 =>
+ array (
+ 0 => 0x0100,
+ 1 => 0x017F,
+ 2 => 'Latin Extended-A',
+ ),
+ 3 =>
+ array (
+ 0 => 0x0180,
+ 1 => 0x024F,
+ 2 => 'Latin Extended-B',
+ ),
+ 4 =>
+ array (
+ 0 => 0x0250,
+ 1 => 0x02AF,
+ 2 => 'IPA Extensions',
+ ),
+ 5 =>
+ array (
+ 0 => 0x02B0,
+ 1 => 0x02FF,
+ 2 => 'Spacing Modifier Letters',
+ ),
+ 6 =>
+ array (
+ 0 => 0x0300,
+ 1 => 0x036F,
+ 2 => 'Combining Diacritical Marks',
+ ),
+ 7 =>
+ array (
+ 0 => 0x0370,
+ 1 => 0x03FF,
+ 2 => 'Greek and Coptic',
+ ),
+ 8 =>
+ array (
+ 0 => 0x0400,
+ 1 => 0x04FF,
+ 2 => 'Cyrillic',
+ ),
+ 9 =>
+ array (
+ 0 => 0x0500,
+ 1 => 0x052F,
+ 2 => 'Cyrillic Supplement',
+ ),
+ 10 =>
+ array (
+ 0 => 0x0530,
+ 1 => 0x058F,
+ 2 => 'Armenian',
+ ),
+ 11 =>
+ array (
+ 0 => 0x0590,
+ 1 => 0x05FF,
+ 2 => 'Hebrew',
+ ),
+ 12 =>
+ array (
+ 0 => 0x0600,
+ 1 => 0x06FF,
+ 2 => 'Arabic',
+ ),
+ 13 =>
+ array (
+ 0 => 0x0700,
+ 1 => 0x074F,
+ 2 => 'Syriac',
+ ),
+ 14 =>
+ array (
+ 0 => 0x0750,
+ 1 => 0x077F,
+ 2 => 'Arabic Supplement',
+ ),
+ 15 =>
+ array (
+ 0 => 0x0780,
+ 1 => 0x07BF,
+ 2 => 'Thaana',
+ ),
+ 16 =>
+ array (
+ 0 => 0x0900,
+ 1 => 0x097F,
+ 2 => 'Devanagari',
+ ),
+ 17 =>
+ array (
+ 0 => 0x0980,
+ 1 => 0x09FF,
+ 2 => 'Bengali',
+ ),
+ 18 =>
+ array (
+ 0 => 0x0A00,
+ 1 => 0x0A7F,
+ 2 => 'Gurmukhi',
+ ),
+ 19 =>
+ array (
+ 0 => 0x0A80,
+ 1 => 0x0AFF,
+ 2 => 'Gujarati',
+ ),
+ 20 =>
+ array (
+ 0 => 0x0B00,
+ 1 => 0x0B7F,
+ 2 => 'Oriya',
+ ),
+ 21 =>
+ array (
+ 0 => 0x0B80,
+ 1 => 0x0BFF,
+ 2 => 'Tamil',
+ ),
+ 22 =>
+ array (
+ 0 => 0x0C00,
+ 1 => 0x0C7F,
+ 2 => 'Telugu',
+ ),
+ 23 =>
+ array (
+ 0 => 0x0C80,
+ 1 => 0x0CFF,
+ 2 => 'Kannada',
+ ),
+ 24 =>
+ array (
+ 0 => 0x0D00,
+ 1 => 0x0D7F,
+ 2 => 'Malayalam',
+ ),
+ 25 =>
+ array (
+ 0 => 0x0D80,
+ 1 => 0x0DFF,
+ 2 => 'Sinhala',
+ ),
+ 26 =>
+ array (
+ 0 => 0x0E00,
+ 1 => 0x0E7F,
+ 2 => 'Thai',
+ ),
+ 27 =>
+ array (
+ 0 => 0x0E80,
+ 1 => 0x0EFF,
+ 2 => 'Lao',
+ ),
+ 28 =>
+ array (
+ 0 => 0x0F00,
+ 1 => 0x0FFF,
+ 2 => 'Tibetan',
+ ),
+ 29 =>
+ array (
+ 0 => 0x1000,
+ 1 => 0x109F,
+ 2 => 'Myanmar',
+ ),
+ 30 =>
+ array (
+ 0 => 0x10A0,
+ 1 => 0x10FF,
+ 2 => 'Georgian',
+ ),
+ 31 =>
+ array (
+ 0 => 0x1100,
+ 1 => 0x11FF,
+ 2 => 'Hangul Jamo',
+ ),
+ 32 =>
+ array (
+ 0 => 0x1200,
+ 1 => 0x137F,
+ 2 => 'Ethiopic',
+ ),
+ 33 =>
+ array (
+ 0 => 0x1380,
+ 1 => 0x139F,
+ 2 => 'Ethiopic Supplement',
+ ),
+ 34 =>
+ array (
+ 0 => 0x13A0,
+ 1 => 0x13FF,
+ 2 => 'Cherokee',
+ ),
+ 35 =>
+ array (
+ 0 => 0x1400,
+ 1 => 0x167F,
+ 2 => 'Unified Canadian Aboriginal Syllabics',
+ ),
+ 36 =>
+ array (
+ 0 => 0x1680,
+ 1 => 0x169F,
+ 2 => 'Ogham',
+ ),
+ 37 =>
+ array (
+ 0 => 0x16A0,
+ 1 => 0x16FF,
+ 2 => 'Runic',
+ ),
+ 38 =>
+ array (
+ 0 => 0x1700,
+ 1 => 0x171F,
+ 2 => 'Tagalog',
+ ),
+ 39 =>
+ array (
+ 0 => 0x1720,
+ 1 => 0x173F,
+ 2 => 'Hanunoo',
+ ),
+ 40 =>
+ array (
+ 0 => 0x1740,
+ 1 => 0x175F,
+ 2 => 'Buhid',
+ ),
+ 41 =>
+ array (
+ 0 => 0x1760,
+ 1 => 0x177F,
+ 2 => 'Tagbanwa',
+ ),
+ 42 =>
+ array (
+ 0 => 0x1780,
+ 1 => 0x17FF,
+ 2 => 'Khmer',
+ ),
+ 43 =>
+ array (
+ 0 => 0x1800,
+ 1 => 0x18AF,
+ 2 => 'Mongolian',
+ ),
+ 44 =>
+ array (
+ 0 => 0x1900,
+ 1 => 0x194F,
+ 2 => 'Limbu',
+ ),
+ 45 =>
+ array (
+ 0 => 0x1950,
+ 1 => 0x197F,
+ 2 => 'Tai Le',
+ ),
+ 46 =>
+ array (
+ 0 => 0x1980,
+ 1 => 0x19DF,
+ 2 => 'New Tai Lue',
+ ),
+ 47 =>
+ array (
+ 0 => 0x19E0,
+ 1 => 0x19FF,
+ 2 => 'Khmer Symbols',
+ ),
+ 48 =>
+ array (
+ 0 => 0x1A00,
+ 1 => 0x1A1F,
+ 2 => 'Buginese',
+ ),
+ 49 =>
+ array (
+ 0 => 0x1D00,
+ 1 => 0x1D7F,
+ 2 => 'Phonetic Extensions',
+ ),
+ 50 =>
+ array (
+ 0 => 0x1D80,
+ 1 => 0x1DBF,
+ 2 => 'Phonetic Extensions Supplement',
+ ),
+ 51 =>
+ array (
+ 0 => 0x1DC0,
+ 1 => 0x1DFF,
+ 2 => 'Combining Diacritical Marks Supplement',
+ ),
+ 52 =>
+ array (
+ 0 => 0x1E00,
+ 1 => 0x1EFF,
+ 2 => 'Latin Extended Additional',
+ ),
+ 53 =>
+ array (
+ 0 => 0x1F00,
+ 1 => 0x1FFF,
+ 2 => 'Greek Extended',
+ ),
+ 54 =>
+ array (
+ 0 => 0x2000,
+ 1 => 0x206F,
+ 2 => 'General Punctuation',
+ ),
+ 55 =>
+ array (
+ 0 => 0x2070,
+ 1 => 0x209F,
+ 2 => 'Superscripts and Subscripts',
+ ),
+ 56 =>
+ array (
+ 0 => 0x20A0,
+ 1 => 0x20CF,
+ 2 => 'Currency Symbols',
+ ),
+ 57 =>
+ array (
+ 0 => 0x20D0,
+ 1 => 0x20FF,
+ 2 => 'Combining Diacritical Marks for Symbols',
+ ),
+ 58 =>
+ array (
+ 0 => 0x2100,
+ 1 => 0x214F,
+ 2 => 'Letterlike Symbols',
+ ),
+ 59 =>
+ array (
+ 0 => 0x2150,
+ 1 => 0x218F,
+ 2 => 'Number Forms',
+ ),
+ 60 =>
+ array (
+ 0 => 0x2190,
+ 1 => 0x21FF,
+ 2 => 'Arrows',
+ ),
+ 61 =>
+ array (
+ 0 => 0x2200,
+ 1 => 0x22FF,
+ 2 => 'Mathematical Operators',
+ ),
+ 62 =>
+ array (
+ 0 => 0x2300,
+ 1 => 0x23FF,
+ 2 => 'Miscellaneous Technical',
+ ),
+ 63 =>
+ array (
+ 0 => 0x2400,
+ 1 => 0x243F,
+ 2 => 'Control Pictures',
+ ),
+ 64 =>
+ array (
+ 0 => 0x2440,
+ 1 => 0x245F,
+ 2 => 'Optical Character Recognition',
+ ),
+ 65 =>
+ array (
+ 0 => 0x2460,
+ 1 => 0x24FF,
+ 2 => 'Enclosed Alphanumerics',
+ ),
+ 66 =>
+ array (
+ 0 => 0x2500,
+ 1 => 0x257F,
+ 2 => 'Box Drawing',
+ ),
+ 67 =>
+ array (
+ 0 => 0x2580,
+ 1 => 0x259F,
+ 2 => 'Block Elements',
+ ),
+ 68 =>
+ array (
+ 0 => 0x25A0,
+ 1 => 0x25FF,
+ 2 => 'Geometric Shapes',
+ ),
+ 69 =>
+ array (
+ 0 => 0x2600,
+ 1 => 0x26FF,
+ 2 => 'Miscellaneous Symbols',
+ ),
+ 70 =>
+ array (
+ 0 => 0x2700,
+ 1 => 0x27BF,
+ 2 => 'Dingbats',
+ ),
+ 71 =>
+ array (
+ 0 => 0x27C0,
+ 1 => 0x27EF,
+ 2 => 'Miscellaneous Mathematical Symbols-A',
+ ),
+ 72 =>
+ array (
+ 0 => 0x27F0,
+ 1 => 0x27FF,
+ 2 => 'Supplemental Arrows-A',
+ ),
+ 73 =>
+ array (
+ 0 => 0x2800,
+ 1 => 0x28FF,
+ 2 => 'Braille Patterns',
+ ),
+ 74 =>
+ array (
+ 0 => 0x2900,
+ 1 => 0x297F,
+ 2 => 'Supplemental Arrows-B',
+ ),
+ 75 =>
+ array (
+ 0 => 0x2980,
+ 1 => 0x29FF,
+ 2 => 'Miscellaneous Mathematical Symbols-B',
+ ),
+ 76 =>
+ array (
+ 0 => 0x2A00,
+ 1 => 0x2AFF,
+ 2 => 'Supplemental Mathematical Operators',
+ ),
+ 77 =>
+ array (
+ 0 => 0x2B00,
+ 1 => 0x2BFF,
+ 2 => 'Miscellaneous Symbols and Arrows',
+ ),
+ 78 =>
+ array (
+ 0 => 0x2C00,
+ 1 => 0x2C5F,
+ 2 => 'Glagolitic',
+ ),
+ 79 =>
+ array (
+ 0 => 0x2C80,
+ 1 => 0x2CFF,
+ 2 => 'Coptic',
+ ),
+ 80 =>
+ array (
+ 0 => 0x2D00,
+ 1 => 0x2D2F,
+ 2 => 'Georgian Supplement',
+ ),
+ 81 =>
+ array (
+ 0 => 0x2D30,
+ 1 => 0x2D7F,
+ 2 => 'Tifinagh',
+ ),
+ 82 =>
+ array (
+ 0 => 0x2D80,
+ 1 => 0x2DDF,
+ 2 => 'Ethiopic Extended',
+ ),
+ 83 =>
+ array (
+ 0 => 0x2E00,
+ 1 => 0x2E7F,
+ 2 => 'Supplemental Punctuation',
+ ),
+ 84 =>
+ array (
+ 0 => 0x2E80,
+ 1 => 0x2EFF,
+ 2 => 'CJK Radicals Supplement',
+ ),
+ 85 =>
+ array (
+ 0 => 0x2F00,
+ 1 => 0x2FDF,
+ 2 => 'Kangxi Radicals',
+ ),
+ 86 =>
+ array (
+ 0 => 0x2FF0,
+ 1 => 0x2FFF,
+ 2 => 'Ideographic Description Characters',
+ ),
+ 87 =>
+ array (
+ 0 => 0x3000,
+ 1 => 0x303F,
+ 2 => 'CJK Symbols and Punctuation',
+ ),
+ 88 =>
+ array (
+ 0 => 0x3040,
+ 1 => 0x309F,
+ 2 => 'Hiragana',
+ ),
+ 89 =>
+ array (
+ 0 => 0x30A0,
+ 1 => 0x30FF,
+ 2 => 'Katakana',
+ ),
+ 90 =>
+ array (
+ 0 => 0x3100,
+ 1 => 0x312F,
+ 2 => 'Bopomofo',
+ ),
+ 91 =>
+ array (
+ 0 => 0x3130,
+ 1 => 0x318F,
+ 2 => 'Hangul Compatibility Jamo',
+ ),
+ 92 =>
+ array (
+ 0 => 0x3190,
+ 1 => 0x319F,
+ 2 => 'Kanbun',
+ ),
+ 93 =>
+ array (
+ 0 => 0x31A0,
+ 1 => 0x31BF,
+ 2 => 'Bopomofo Extended',
+ ),
+ 94 =>
+ array (
+ 0 => 0x31C0,
+ 1 => 0x31EF,
+ 2 => 'CJK Strokes',
+ ),
+ 95 =>
+ array (
+ 0 => 0x31F0,
+ 1 => 0x31FF,
+ 2 => 'Katakana Phonetic Extensions',
+ ),
+ 96 =>
+ array (
+ 0 => 0x3200,
+ 1 => 0x32FF,
+ 2 => 'Enclosed CJK Letters and Months',
+ ),
+ 97 =>
+ array (
+ 0 => 0x3300,
+ 1 => 0x33FF,
+ 2 => 'CJK Compatibility',
+ ),
+ 98 =>
+ array (
+ 0 => 0x3400,
+ 1 => 0x4DBF,
+ 2 => 'CJK Unified Ideographs Extension A',
+ ),
+ 99 =>
+ array (
+ 0 => 0x4DC0,
+ 1 => 0x4DFF,
+ 2 => 'Yijing Hexagram Symbols',
+ ),
+ 100 =>
+ array (
+ 0 => 0x4E00,
+ 1 => 0x9FFF,
+ 2 => 'CJK Unified Ideographs',
+ ),
+ 101 =>
+ array (
+ 0 => 0xA000,
+ 1 => 0xA48F,
+ 2 => 'Yi Syllables',
+ ),
+ 102 =>
+ array (
+ 0 => 0xA490,
+ 1 => 0xA4CF,
+ 2 => 'Yi Radicals',
+ ),
+ 103 =>
+ array (
+ 0 => 0xA700,
+ 1 => 0xA71F,
+ 2 => 'Modifier Tone Letters',
+ ),
+ 104 =>
+ array (
+ 0 => 0xA800,
+ 1 => 0xA82F,
+ 2 => 'Syloti Nagri',
+ ),
+ 105 =>
+ array (
+ 0 => 0xAC00,
+ 1 => 0xD7AF,
+ 2 => 'Hangul Syllables',
+ ),
+ 106 =>
+ array (
+ 0 => 0xD800,
+ 1 => 0xDB7F,
+ 2 => 'High Surrogates',
+ ),
+ 107 =>
+ array (
+ 0 => 0xDB80,
+ 1 => 0xDBFF,
+ 2 => 'High Private Use Surrogates',
+ ),
+ 108 =>
+ array (
+ 0 => 0xDC00,
+ 1 => 0xDFFF,
+ 2 => 'Low Surrogates',
+ ),
+ 109 =>
+ array (
+ 0 => 0xE000,
+ 1 => 0xF8FF,
+ 2 => 'Private Use Area',
+ ),
+ 110 =>
+ array (
+ 0 => 0xF900,
+ 1 => 0xFAFF,
+ 2 => 'CJK Compatibility Ideographs',
+ ),
+ 111 =>
+ array (
+ 0 => 0xFB00,
+ 1 => 0xFB4F,
+ 2 => 'Alphabetic Presentation Forms',
+ ),
+ 112 =>
+ array (
+ 0 => 0xFB50,
+ 1 => 0xFDFF,
+ 2 => 'Arabic Presentation Forms-A',
+ ),
+ 113 =>
+ array (
+ 0 => 0xFE00,
+ 1 => 0xFE0F,
+ 2 => 'Variation Selectors',
+ ),
+ 114 =>
+ array (
+ 0 => 0xFE10,
+ 1 => 0xFE1F,
+ 2 => 'Vertical Forms',
+ ),
+ 115 =>
+ array (
+ 0 => 0xFE20,
+ 1 => 0xFE2F,
+ 2 => 'Combining Half Marks',
+ ),
+ 116 =>
+ array (
+ 0 => 0xFE30,
+ 1 => 0xFE4F,
+ 2 => 'CJK Compatibility Forms',
+ ),
+ 117 =>
+ array (
+ 0 => 0xFE50,
+ 1 => 0xFE6F,
+ 2 => 'Small Form Variants',
+ ),
+ 118 =>
+ array (
+ 0 => 0xFE70,
+ 1 => 0xFEFF,
+ 2 => 'Arabic Presentation Forms-B',
+ ),
+ 119 =>
+ array (
+ 0 => 0xFF00,
+ 1 => 0xFFEF,
+ 2 => 'Halfwidth and Fullwidth Forms',
+ ),
+ 120 =>
+ array (
+ 0 => 0xFFF0,
+ 1 => 0xFFFF,
+ 2 => 'Specials',
+ ),
+ 121 =>
+ array (
+ 0 => 0x10000,
+ 1 => 0x1007F,
+ 2 => 'Linear B Syllabary',
+ ),
+ 122 =>
+ array (
+ 0 => 0x10080,
+ 1 => 0x100FF,
+ 2 => 'Linear B Ideograms',
+ ),
+ 123 =>
+ array (
+ 0 => 0x10100,
+ 1 => 0x1013F,
+ 2 => 'Aegean Numbers',
+ ),
+ 124 =>
+ array (
+ 0 => 0x10140,
+ 1 => 0x1018F,
+ 2 => 'Ancient Greek Numbers',
+ ),
+ 125 =>
+ array (
+ 0 => 0x10300,
+ 1 => 0x1032F,
+ 2 => 'Old Italic',
+ ),
+ 126 =>
+ array (
+ 0 => 0x10330,
+ 1 => 0x1034F,
+ 2 => 'Gothic',
+ ),
+ 127 =>
+ array (
+ 0 => 0x10380,
+ 1 => 0x1039F,
+ 2 => 'Ugaritic',
+ ),
+ 128 =>
+ array (
+ 0 => 0x103A0,
+ 1 => 0x103DF,
+ 2 => 'Old Persian',
+ ),
+ 129 =>
+ array (
+ 0 => 0x10400,
+ 1 => 0x1044F,
+ 2 => 'Deseret',
+ ),
+ 130 =>
+ array (
+ 0 => 0x10450,
+ 1 => 0x1047F,
+ 2 => 'Shavian',
+ ),
+ 131 =>
+ array (
+ 0 => 0x10480,
+ 1 => 0x104AF,
+ 2 => 'Osmanya',
+ ),
+ 132 =>
+ array (
+ 0 => 0x10800,
+ 1 => 0x1083F,
+ 2 => 'Cypriot Syllabary',
+ ),
+ 133 =>
+ array (
+ 0 => 0x10A00,
+ 1 => 0x10A5F,
+ 2 => 'Kharoshthi',
+ ),
+ 134 =>
+ array (
+ 0 => 0x1D000,
+ 1 => 0x1D0FF,
+ 2 => 'Byzantine Musical Symbols',
+ ),
+ 135 =>
+ array (
+ 0 => 0x1D100,
+ 1 => 0x1D1FF,
+ 2 => 'Musical Symbols',
+ ),
+ 136 =>
+ array (
+ 0 => 0x1D200,
+ 1 => 0x1D24F,
+ 2 => 'Ancient Greek Musical Notation',
+ ),
+ 137 =>
+ array (
+ 0 => 0x1D300,
+ 1 => 0x1D35F,
+ 2 => 'Tai Xuan Jing Symbols',
+ ),
+ 138 =>
+ array (
+ 0 => 0x1D400,
+ 1 => 0x1D7FF,
+ 2 => 'Mathematical Alphanumeric Symbols',
+ ),
+ 139 =>
+ array (
+ 0 => 0x20000,
+ 1 => 0x2A6DF,
+ 2 => 'CJK Unified Ideographs Extension B',
+ ),
+ 140 =>
+ array (
+ 0 => 0x2F800,
+ 1 => 0x2FA1F,
+ 2 => 'CJK Compatibility Ideographs Supplement',
+ ),
+ 141 =>
+ array (
+ 0 => 0xE0000,
+ 1 => 0xE007F,
+ 2 => 'Tags',
+ ),
+ 142 =>
+ array (
+ 0 => 0xE0100,
+ 1 => 0xE01EF,
+ 2 => 'Variation Selectors Supplement',
+ ),
+ 143 =>
+ array (
+ 0 => 0xF0000,
+ 1 => 0xFFFFF,
+ 2 => 'Supplementary Private Use Area-A',
+ ),
+ 144 =>
+ array (
+ 0 => 0x100000,
+ 1 => 0x10FFFF,
+ 2 => 'Supplementary Private Use Area-B',
+ ),
+);
+?>
diff --git a/vendor/pear/text_languagedetect/package.xml b/vendor/pear/text_languagedetect/package.xml
new file mode 100644
index 000000000..4fac0051d
--- /dev/null
+++ b/vendor/pear/text_languagedetect/package.xml
@@ -0,0 +1,246 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<package packagerversion="1.7.1" version="2.0" xmlns="http://pear.php.net/dtd/package-2.0" xmlns:tasks="http://pear.php.net/dtd/tasks-1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pear.php.net/dtd/tasks-1.0
+http://pear.php.net/dtd/tasks-1.0.xsd
+http://pear.php.net/dtd/package-2.0
+http://pear.php.net/dtd/package-2.0.xsd">
+ <name>Text_LanguageDetect</name>
+ <channel>pear.php.net</channel>
+ <summary>Language detection class</summary>
+ <description>Text_LanguageDetect can identify 52 human languages from text samples and return confidence scores for each.
+ </description>
+
+ <lead>
+ <name>Nicholas Pisarro</name>
+ <user>taak</user>
+ <email>taak@php.net</email>
+ <active>no</active>
+ </lead>
+
+ <date>2017-03-02</date>
+ <time>18:00:42</time>
+ <version>
+ <release>1.0.0</release>
+ <api>1.0.0</api>
+ </version>
+
+ <stability>
+ <release>stable</release>
+ <api>stable</api>
+ </stability>
+
+ <license uri="http://www.opensource.org/licenses/bsd-license.php">BSD</license>
+
+ <notes>
+ - Add PHP5 constructors
+ - Fix bug #21189: Language detection on PHP7 broken
+ - Fix coding style problems
+ - BC break: Add real visibility to properties and methods
+ </notes>
+
+ <contents>
+ <dir name="/">
+ <file name="README.rst" role="doc" />
+ <dir name="data">
+ <file name="lang.dat" role="data" />
+ <file name="unicode_blocks.dat" role="data" />
+ </dir> <!-- /data -->
+ <dir name="docs">
+ <file name="confidence.php" role="doc" />
+ <file name="example_clui.php" role="doc" />
+ <file name="example_web.php" role="doc" />
+ <file name="iso.php" role="doc" />
+ <file name="languages.php" role="doc" />
+ <file name="simple.php" role="doc" />
+ </dir> <!-- /docs -->
+ <dir name="tests">
+ <file name="PrivProxy.php" role="test" />
+ <file name="Text_LanguageDetectTest.php" role="test" />
+ <file name="Text_LanguageDetect_ISO639Test.php" role="test" />
+ </dir> <!-- /tests -->
+ <dir name="Text">
+ <dir name="LanguageDetect">
+ <file name="Exception.php" role="php">
+ <tasks:replace from="@package_version@" to="version" type="package-info" />
+ </file>
+ <file name="ISO639.php" role="php">
+ <tasks:replace from="@package_version@" to="version" type="package-info" />
+ </file>
+ <file name="Parser.php" role="php">
+ <tasks:replace from="@package_version@" to="version" type="package-info" />
+ </file>
+ </dir> <!-- /Text/LanguageDetect -->
+ <file name="LanguageDetect.php" role="php">
+ <tasks:replace from="@package_version@" to="version" type="package-info" />
+ <tasks:replace from="@data_dir@" to="data_dir" type="pear-config" />
+ </file>
+ </dir> <!-- /Text -->
+ </dir> <!-- / -->
+ </contents>
+
+ <dependencies>
+ <required>
+ <php>
+ <min>5.4</min>
+ </php>
+ <pearinstaller>
+ <min>1.9.0</min>
+ </pearinstaller>
+ <extension>
+ <name>pcre</name>
+ </extension>
+ </required>
+ <optional>
+ <extension>
+ <name>mbstring</name>
+ </extension>
+ </optional>
+ </dependencies>
+
+ <phprelease>
+ <filelist>
+ <install as="lang.dat" name="data/lang.dat" />
+ <install as="unicode_blocks.dat" name="data/unicode_blocks.dat" />
+ </filelist>
+ </phprelease>
+
+ <changelog>
+
+ <release>
+ <version>
+ <release>1.0.0</release>
+ <api>1.0.0</api>
+ </version>
+ <stability>
+ <release>stable</release>
+ <api>stable</api>
+ </stability>
+ <date>2017-03-02</date>
+ <license uri="http://www.opensource.org/licenses/bsd-license.php">BSD</license>
+ <notes>
+ - Add PHP5 constructors
+ - Fix bug #21189: Language detection on PHP7 broken
+ - Fix coding style problems
+ - BC break: Add real visibility to properties and methods
+ </notes>
+ </release>
+
+ <release>
+ <version>
+ <release>0.3.0</release>
+ <api>0.3.0</api>
+ </version>
+ <stability>
+ <release>alpha</release>
+ <api>alpha</api>
+ </stability>
+ <date>2012-01-16</date>
+ <license uri="http://www.opensource.org/licenses/bsd-license.php">BSD</license>
+ <notes>
+- BC break: Return lowercase language names
+- BC break: Use exceptions instead of PEAR_Error
+- Implement request #19221: Return ISO 639-1 or ISO 639-2 language codes
+ </notes>
+ </release>
+
+ <release>
+ <version>
+ <release>0.2.3</release>
+ <api>0.2.3</api>
+ </version>
+ <stability>
+ <release>alpha</release>
+ <api>alpha</api>
+ </stability>
+ <date>2008-07-30</date>
+ <license uri="http://www.opensource.org/licenses/bsd-license.php">BSD</license>
+ <notes>updated package requirements
+ </notes>
+ </release>
+
+ <release>
+ <version>
+ <release>0.2.2</release>
+ <api>0.2.2</api>
+ </version>
+ <stability>
+ <release>alpha</release>
+ <api>alpha</api>
+ </stability>
+ <date>2008-07-30</date>
+ <license uri="http://www.opensource.org/licenses/bsd-license.php">BSD</license>
+ <notes>* Fixed Bug #13385
+ </notes>
+ </release>
+
+ <release>
+ <version>
+ <release>0.2.1</release>
+ <api>0.2.1</api>
+ </version>
+ <stability>
+ <release>alpha</release>
+ <api>alpha</api>
+ </stability>
+ <date>2006-12-03</date>
+ <license uri="http://www.opensource.org/licenses/bsd-license.php">BSD</license>
+ <notes>* Fix: Now uses conventionalized include path
+* Fix: Won&apos;t cause error if input is purely symbol-range characters
+* Better error reporting if error in unicode db loading
+ </notes>
+ </release>
+
+ <release>
+ <version>
+ <release>0.2.0</release>
+ <api>0.2.0</api>
+ </version>
+ <stability>
+ <release>alpha</release>
+ <api>alpha</api>
+ </stability>
+ <date>2006-01-18</date>
+ <license uri="http://www.opensource.org/licenses/bsd-license.php">BSD</license>
+ <notes>* Added unicode block range identification
+* unicode block ranges used to optimize language detection by pre-selecting which known language trigram profiles in the database to attempt comparison with
+* Added several utf8/unicode utility functions
+* new Parser class for building data profiles from text samples
+* Fix: mb_convert_encoding() now used correctly
+* Fix: basic case-folding for Cyrillic alphabet implemented
+ </notes>
+ </release>
+
+ <release>
+ <version>
+ <release>0.1.1</release>
+ <api>0.1.1</api>
+ </version>
+ <stability>
+ <release>alpha</release>
+ <api>alpha</api>
+ </stability>
+ <date>2006-01-06</date>
+ <license uri="http://www.opensource.org/licenses/bsd-license.php">BSD</license>
+ <notes>* Output of clustering now cached
+* Fix: better error checking in clustering functions
+* Fix: clusteredSearch() now handles null strings gracefully
+* Compare order is preserved in clusteredSearch() results
+* Slight speed improvement to utf8 iterator
+ </notes>
+ </release>
+
+ <release>
+ <version>
+ <release>0.1.0</release>
+ <api>0.1.0</api>
+ </version>
+ <stability>
+ <release>alpha</release>
+ <api>alpha</api>
+ </stability>
+ <date>2006-01-04</date>
+ <license uri="http://www.opensource.org/licenses/bsd-license.php">BSD</license>
+ <notes>Initial PEAR release
+ </notes>
+ </release>
+ </changelog>
+</package>