From 646dce7765b36afab5c1688c51ebab48a214864e Mon Sep 17 00:00:00 2001 From: Mario Vavti Date: Sat, 22 Aug 2020 20:01:59 +0200 Subject: composer update pear/text_languagedetect --- SBOM.md | 2 +- composer.lock | 12 ++-- vendor/composer/autoload_classmap.php | 2 + vendor/composer/autoload_static.php | 2 + vendor/composer/installed.json | 14 ++--- vendor/pear/text_languagedetect/.gitignore | 6 ++ vendor/pear/text_languagedetect/.travis.yml | 14 +++++ vendor/pear/text_languagedetect/README.rst | 9 +++ .../text_languagedetect/Text/LanguageDetect.php | 42 ++++++------- vendor/pear/text_languagedetect/composer.json | 2 +- .../pear/text_languagedetect/docs/confidence.php | 18 ++++++ .../text_languagedetect/docs/errorhandling.php | 15 +++++ .../pear/text_languagedetect/docs/example_clui.php | 35 +++++++++++ .../pear/text_languagedetect/docs/example_web.php | 72 ++++++++++++++++++++++ vendor/pear/text_languagedetect/docs/iso.php | 19 ++++++ vendor/pear/text_languagedetect/docs/languages.php | 11 ++++ vendor/pear/text_languagedetect/docs/simple.php | 10 +++ vendor/pear/text_languagedetect/phpcs.xml | 10 +++ 18 files changed, 256 insertions(+), 39 deletions(-) create mode 100644 vendor/pear/text_languagedetect/.gitignore create mode 100644 vendor/pear/text_languagedetect/.travis.yml create mode 100644 vendor/pear/text_languagedetect/docs/confidence.php create mode 100644 vendor/pear/text_languagedetect/docs/errorhandling.php create mode 100644 vendor/pear/text_languagedetect/docs/example_clui.php create mode 100644 vendor/pear/text_languagedetect/docs/example_web.php create mode 100644 vendor/pear/text_languagedetect/docs/iso.php create mode 100644 vendor/pear/text_languagedetect/docs/languages.php create mode 100644 vendor/pear/text_languagedetect/docs/simple.php create mode 100644 vendor/pear/text_languagedetect/phpcs.xml diff --git a/SBOM.md b/SBOM.md index e54b4d4ac..bdd0e326f 100644 --- a/SBOM.md +++ b/SBOM.md @@ -11,7 +11,7 @@ |lukasreschke/id3parser|0.0.3.0|GPL|https://github.com/LukasReschke/ID3Parser.git| |michelf/php-markdown|1.9.0.0|BSD-3-Clause|https://github.com/michelf/php-markdown.git| |paragonie/random_compat|9.99.99.0|MIT|https://github.com/paragonie/random_compat.git| -|pear/text_languagedetect|1.0.0.0|BSD-2-Clause|https://github.com/pear/Text_LanguageDetect.git| +|pear/text_languagedetect|1.0.1.0|BSD-2-Clause|https://github.com/pear/Text_LanguageDetect.git| |psr/log|1.1.3.0|MIT|https://github.com/php-fig/log.git| |ramsey/uuid|3.9.3.0|MIT|https://github.com/ramsey/uuid.git| |sabre/dav|4.1.1.0|BSD-3-Clause|https://github.com/sabre-io/dav.git| diff --git a/composer.lock b/composer.lock index 1c8c777fa..11350b8aa 100644 --- a/composer.lock +++ b/composer.lock @@ -472,20 +472,20 @@ }, { "name": "pear/text_languagedetect", - "version": "v1.0.0", + "version": "v1.0.1", "source": { "type": "git", "url": "https://github.com/pear/Text_LanguageDetect.git", - "reference": "bb9ff6f4970f686fac59081e916b456021fe7ba6" + "reference": "9e253f26cef9a9066f53f200cc3e0684018cb5b5" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/pear/Text_LanguageDetect/zipball/bb9ff6f4970f686fac59081e916b456021fe7ba6", - "reference": "bb9ff6f4970f686fac59081e916b456021fe7ba6", + "url": "https://api.github.com/repos/pear/Text_LanguageDetect/zipball/9e253f26cef9a9066f53f200cc3e0684018cb5b5", + "reference": "9e253f26cef9a9066f53f200cc3e0684018cb5b5", "shasum": "" }, "require-dev": { - "phpunit/phpunit": "*" + "phpunit/phpunit": "8.*|9.*" }, "suggest": { "ext-mbstring": "May require the mbstring PHP extension" @@ -512,7 +512,7 @@ ], "description": "Identify human languages from text samples", "homepage": "http://pear.php.net/package/Text_LanguageDetect", - "time": "2017-03-02T16:14:08+00:00" + "time": "2020-05-17T12:19:40+00:00" }, { "name": "psr/log", diff --git a/vendor/composer/autoload_classmap.php b/vendor/composer/autoload_classmap.php index 54a5ea11d..104a418da 100644 --- a/vendor/composer/autoload_classmap.php +++ b/vendor/composer/autoload_classmap.php @@ -970,8 +970,10 @@ return array( 'TPC_yyStackEntry' => $vendorDir . '/smarty/smarty/libs/sysplugins/smarty_internal_configfileparser.php', 'TP_yyStackEntry' => $vendorDir . '/smarty/smarty/libs/sysplugins/smarty_internal_templateparser.php', 'Text_LanguageDetect' => $vendorDir . '/pear/text_languagedetect/Text/LanguageDetect.php', + 'Text_LanguageDetectTest' => $vendorDir . '/pear/text_languagedetect/tests/Text_LanguageDetectTest.php', 'Text_LanguageDetect_Exception' => $vendorDir . '/pear/text_languagedetect/Text/LanguageDetect/Exception.php', 'Text_LanguageDetect_ISO639' => $vendorDir . '/pear/text_languagedetect/Text/LanguageDetect/ISO639.php', + 'Text_LanguageDetect_ISO639Test' => $vendorDir . '/pear/text_languagedetect/tests/Text_LanguageDetect_ISO639Test.php', 'Text_LanguageDetect_Parser' => $vendorDir . '/pear/text_languagedetect/Text/LanguageDetect/Parser.php', 'UploadHandler' => $vendorDir . '/blueimp/jquery-file-upload/server/php/UploadHandler.php', 'Zotlabs\\Access\\AccessList' => $baseDir . '/Zotlabs/Access/AccessList.php', diff --git a/vendor/composer/autoload_static.php b/vendor/composer/autoload_static.php index fd44b4266..05a15f615 100644 --- a/vendor/composer/autoload_static.php +++ b/vendor/composer/autoload_static.php @@ -1138,8 +1138,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d 'TPC_yyStackEntry' => __DIR__ . '/..' . '/smarty/smarty/libs/sysplugins/smarty_internal_configfileparser.php', 'TP_yyStackEntry' => __DIR__ . '/..' . '/smarty/smarty/libs/sysplugins/smarty_internal_templateparser.php', 'Text_LanguageDetect' => __DIR__ . '/..' . '/pear/text_languagedetect/Text/LanguageDetect.php', + 'Text_LanguageDetectTest' => __DIR__ . '/..' . '/pear/text_languagedetect/tests/Text_LanguageDetectTest.php', 'Text_LanguageDetect_Exception' => __DIR__ . '/..' . '/pear/text_languagedetect/Text/LanguageDetect/Exception.php', 'Text_LanguageDetect_ISO639' => __DIR__ . '/..' . '/pear/text_languagedetect/Text/LanguageDetect/ISO639.php', + 'Text_LanguageDetect_ISO639Test' => __DIR__ . '/..' . '/pear/text_languagedetect/tests/Text_LanguageDetect_ISO639Test.php', 'Text_LanguageDetect_Parser' => __DIR__ . '/..' . '/pear/text_languagedetect/Text/LanguageDetect/Parser.php', 'UploadHandler' => __DIR__ . '/..' . '/blueimp/jquery-file-upload/server/php/UploadHandler.php', 'Zotlabs\\Access\\AccessList' => __DIR__ . '/../..' . '/Zotlabs/Access/AccessList.php', diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json index 82936174b..31cb1a05d 100644 --- a/vendor/composer/installed.json +++ b/vendor/composer/installed.json @@ -483,26 +483,26 @@ }, { "name": "pear/text_languagedetect", - "version": "v1.0.0", - "version_normalized": "1.0.0.0", + "version": "v1.0.1", + "version_normalized": "1.0.1.0", "source": { "type": "git", "url": "https://github.com/pear/Text_LanguageDetect.git", - "reference": "bb9ff6f4970f686fac59081e916b456021fe7ba6" + "reference": "9e253f26cef9a9066f53f200cc3e0684018cb5b5" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/pear/Text_LanguageDetect/zipball/bb9ff6f4970f686fac59081e916b456021fe7ba6", - "reference": "bb9ff6f4970f686fac59081e916b456021fe7ba6", + "url": "https://api.github.com/repos/pear/Text_LanguageDetect/zipball/9e253f26cef9a9066f53f200cc3e0684018cb5b5", + "reference": "9e253f26cef9a9066f53f200cc3e0684018cb5b5", "shasum": "" }, "require-dev": { - "phpunit/phpunit": "*" + "phpunit/phpunit": "8.*|9.*" }, "suggest": { "ext-mbstring": "May require the mbstring PHP extension" }, - "time": "2017-03-02T16:14:08+00:00", + "time": "2020-05-17T12:19:40+00:00", "type": "library", "installation-source": "dist", "autoload": { diff --git a/vendor/pear/text_languagedetect/.gitignore b/vendor/pear/text_languagedetect/.gitignore new file mode 100644 index 000000000..d83ad3973 --- /dev/null +++ b/vendor/pear/text_languagedetect/.gitignore @@ -0,0 +1,6 @@ +# composer related +composer.lock +composer.phar +vendor +/README.html +/dist diff --git a/vendor/pear/text_languagedetect/.travis.yml b/vendor/pear/text_languagedetect/.travis.yml new file mode 100644 index 000000000..7cb79a61f --- /dev/null +++ b/vendor/pear/text_languagedetect/.travis.yml @@ -0,0 +1,14 @@ +language: php +sudo: false +php: + - 7.2 + - 7.3 + - 7.4 +install: + - pear install pear/PHP_CodeSniffer + - composer install + - phpenv rehash +script: + - composer validate + - ./vendor/bin/phpunit --coverage-text tests + - phpcs Text/ diff --git a/vendor/pear/text_languagedetect/README.rst b/vendor/pear/text_languagedetect/README.rst index 9381c7f7e..15fbd87bb 100644 --- a/vendor/pear/text_languagedetect/README.rst +++ b/vendor/pear/text_languagedetect/README.rst @@ -155,3 +155,12 @@ Unit test status .. image:: https://travis-ci.org/pear/Text_LanguageDetect.svg?branch=master :target: https://travis-ci.org/pear/Text_LanguageDetect + + +Notes +===== +Where are the data from? + + I don't recall where I got the original data set. + It's just the frequencies of 3-letter combinations in each supported language. + It could be generated from a few random wikipedia pages from each language. diff --git a/vendor/pear/text_languagedetect/Text/LanguageDetect.php b/vendor/pear/text_languagedetect/Text/LanguageDetect.php index 420faa941..850b1beaf 100644 --- a/vendor/pear/text_languagedetect/Text/LanguageDetect.php +++ b/vendor/pear/text_languagedetect/Text/LanguageDetect.php @@ -12,9 +12,9 @@ * @link http://pear.php.net/package/Text_LanguageDetect/ */ -//require_once 'Text/LanguageDetect/Exception.php'; -//require_once 'Text/LanguageDetect/Parser.php'; -//require_once 'Text/LanguageDetect/ISO639.php'; +require_once 'Text/LanguageDetect/Exception.php'; +require_once 'Text/LanguageDetect/Parser.php'; +require_once 'Text/LanguageDetect/ISO639.php'; /** * Detects the language of a given piece of text. @@ -189,7 +189,7 @@ class Text_LanguageDetect */ protected function _get_data_loc($fname) { - if ($fname{0} == '/' || $fname{0} == '.') { + if ($fname[0] == '/' || $fname[0] == '.') { // if filename starts with a slash, assume it's an absolute pathname // and skip whatever is in $this->_data_dir return $fname; @@ -247,12 +247,6 @@ class Text_LanguageDetect protected function _checkTrigram($trigram) { if (!is_array($trigram)) { - if (ini_get('magic_quotes_runtime')) { - throw new Text_LanguageDetect_Exception( - 'Error loading database. Try turning magic_quotes_runtime off.', - Text_LanguageDetect_Exception::MAGIC_QUOTES - ); - } throw new Text_LanguageDetect_Exception( 'Language database is not an array.', Text_LanguageDetect_Exception::DB_NOT_ARRAY @@ -1470,31 +1464,31 @@ class Text_LanguageDetect case 1: // normal ASCII-7 byte // 0xxxxxxx --> 0xxxxxxx - return ord($char{0}); + return ord($char[0]); case 2: // 2 byte unicode // 110zzzzx 10xxxxxx --> 00000zzz zxxxxxxx - $z = (ord($char{0}) & 0x000001F) << 6; - $x = (ord($char{1}) & 0x0000003F); + $z = (ord($char[0]) & 0x000001F) << 6; + $x = (ord($char[1]) & 0x0000003F); return ($z | $x); case 3: // 3 byte unicode // 1110zzzz 10zxxxxx 10xxxxxx --> zzzzzxxx xxxxxxxx - $z = (ord($char{0}) & 0x0000000F) << 12; - $x1 = (ord($char{1}) & 0x0000003F) << 6; - $x2 = (ord($char{2}) & 0x0000003F); + $z = (ord($char[0]) & 0x0000000F) << 12; + $x1 = (ord($char[1]) & 0x0000003F) << 6; + $x2 = (ord($char[2]) & 0x0000003F); return ($z | $x1 | $x2); case 4: // 4 byte unicode // 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx --> // 000zzzzz xxxxxxxx xxxxxxxx - $z1 = (ord($char{0}) & 0x00000007) << 18; - $z2 = (ord($char{1}) & 0x0000003F) << 12; - $x1 = (ord($char{2}) & 0x0000003F) << 6; - $x2 = (ord($char{3}) & 0x0000003F); + $z1 = (ord($char[0]) & 0x00000007) << 18; + $z2 = (ord($char[1]) & 0x0000003F) << 12; + $x1 = (ord($char[2]) & 0x0000003F) << 6; + $x2 = (ord($char[3]) & 0x0000003F); return ($z1 | $z2 | $x1 | $x2); } } @@ -1514,7 +1508,7 @@ class Text_LanguageDetect */ protected static function _next_char($str, &$counter, $special_convert = false) { - $char = $str{$counter++}; + $char = $str[$counter++]; $ord = ord($char); // for a description of the utf8 system see @@ -1538,7 +1532,7 @@ class Text_LanguageDetect } elseif ($ord >> 5 == 6) { // two-byte char // multi-byte chars - $nextchar = $str{$counter++}; // get next byte + $nextchar = $str[$counter++]; // get next byte // lower-casing of non-ascii characters is still incomplete @@ -1580,12 +1574,12 @@ class Text_LanguageDetect } elseif ($ord >> 4 == 14) { // three-byte char // tag on next 2 bytes - return $char . $str{$counter++} . $str{$counter++}; + return $char . $str[$counter++] . $str[$counter++]; } elseif ($ord >> 3 == 30) { // four-byte char // tag on next 3 bytes - return $char . $str{$counter++} . $str{$counter++} . $str{$counter++}; + return $char . $str[$counter++] . $str[$counter++] . $str[$counter++]; } else { // error? diff --git a/vendor/pear/text_languagedetect/composer.json b/vendor/pear/text_languagedetect/composer.json index fc94c6506..a65a17d40 100644 --- a/vendor/pear/text_languagedetect/composer.json +++ b/vendor/pear/text_languagedetect/composer.json @@ -27,6 +27,6 @@ "ext-mbstring": "May require the mbstring PHP extension" }, "require-dev": { - "phpunit/phpunit": "*" + "phpunit/phpunit": "8.*|9.*" } } diff --git a/vendor/pear/text_languagedetect/docs/confidence.php b/vendor/pear/text_languagedetect/docs/confidence.php new file mode 100644 index 000000000..5be0fb9b6 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/confidence.php @@ -0,0 +1,18 @@ +detect($text, 3); + +foreach ($results as $language => $confidence) { + echo $language . ': ' . number_format($confidence, 2) . "\n"; +} + +//output: +//german: 0.35 +//dutch: 0.25 +//swedish: 0.20 +?> \ No newline at end of file diff --git a/vendor/pear/text_languagedetect/docs/errorhandling.php b/vendor/pear/text_languagedetect/docs/errorhandling.php new file mode 100644 index 000000000..b68e42476 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/errorhandling.php @@ -0,0 +1,15 @@ +detectSimple('Das ist ein kleiner Text'); + echo "Language is: $lang\n"; +} catch (Text_LanguageDetect_Exception $e) { + echo 'An error occured! Message: ' . $e . "\n"; +} +?> \ No newline at end of file diff --git a/vendor/pear/text_languagedetect/docs/example_clui.php b/vendor/pear/text_languagedetect/docs/example_clui.php new file mode 100644 index 000000000..210b0eec4 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/example_clui.php @@ -0,0 +1,35 @@ +getLanguages(); +sort($langs); +echo join(', ', $langs); + +echo "\ntotal ", count($langs), "\n\n"; + +while ($line = fgets($stdin)) { + $result = $l->detect($line, 4); + print_r($result); + $blocks = $l->detectUnicodeBlocks($line, true); + print_r($blocks); +} + +fclose($stdin); +unset($l); + +/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ + +?> diff --git a/vendor/pear/text_languagedetect/docs/example_web.php b/vendor/pear/text_languagedetect/docs/example_web.php new file mode 100644 index 000000000..bee8f51a4 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/example_web.php @@ -0,0 +1,72 @@ + + + +Text_LanguageDetect demonstration + + +

Text_LanguageDetect

+Supported languages:\n"; +$langs = $l->getLanguages(); +sort($langs); +foreach ($langs as $lang) { + echo ucfirst($lang), ', '; + $i++; +} + +echo "
total $i

"; + +?> +
+Enter text to identify language (at least a couple of sentences):
+ +
+ +
+utf8strlen($q); + if ($len < 20) { // this value picked somewhat arbitrarily + echo "Warning: string not very long ($len chars)
\n"; + } + + $result = $l->detectConfidence($q); + + if ($result == null) { + echo "Text_LanguageDetect cannot identify this piece of text.

\n"; + } else { + echo "Text_LanguageDetect thinks this text is written in {$result['language']} ({$result['similarity']}, {$result['confidence']})

\n"; + } + + $result = $l->detectUnicodeBlocks($q, false); + if (!empty($result)) { + arsort($result); + echo "Unicode blocks present: ", join(', ', array_keys($result)), "\n

"; + } +} + +unset($l); + +/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ + +?> + diff --git a/vendor/pear/text_languagedetect/docs/iso.php b/vendor/pear/text_languagedetect/docs/iso.php new file mode 100644 index 000000000..547316313 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/iso.php @@ -0,0 +1,19 @@ +setNameMode(2); +echo $ld->detectSimple('Das ist ein kleiner Text') . "\n"; + +//will output the ISO 639-2 three-letter language code +// "deu" +$ld->setNameMode(3); +echo $ld->detectSimple('Das ist ein kleiner Text') . "\n"; +?> diff --git a/vendor/pear/text_languagedetect/docs/languages.php b/vendor/pear/text_languagedetect/docs/languages.php new file mode 100644 index 000000000..f6d022c22 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/languages.php @@ -0,0 +1,11 @@ +getLanguages() as $lang) { + echo $lang . "\n"; +} +?> diff --git a/vendor/pear/text_languagedetect/docs/simple.php b/vendor/pear/text_languagedetect/docs/simple.php new file mode 100644 index 000000000..0bfc11eb0 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/simple.php @@ -0,0 +1,10 @@ +detectSimple($text); +var_dump($result); +//output: german +?> diff --git a/vendor/pear/text_languagedetect/phpcs.xml b/vendor/pear/text_languagedetect/phpcs.xml new file mode 100644 index 000000000..9eeccf534 --- /dev/null +++ b/vendor/pear/text_languagedetect/phpcs.xml @@ -0,0 +1,10 @@ + + + + + + + + + + -- cgit v1.2.3