diff options
author | Mario Vavti <mario@mariovavti.com> | 2020-08-22 20:01:59 +0200 |
---|---|---|
committer | Mario Vavti <mario@mariovavti.com> | 2020-08-22 20:01:59 +0200 |
commit | 646dce7765b36afab5c1688c51ebab48a214864e (patch) | |
tree | 5f017403ba8902507a0f1203db4a8954f04b48fa /vendor/pear/text_languagedetect | |
parent | 88a68f96da303893d911f09c25088d4f8288b5fb (diff) | |
download | volse-hubzilla-646dce7765b36afab5c1688c51ebab48a214864e.tar.gz volse-hubzilla-646dce7765b36afab5c1688c51ebab48a214864e.tar.bz2 volse-hubzilla-646dce7765b36afab5c1688c51ebab48a214864e.zip |
composer update pear/text_languagedetect
Diffstat (limited to 'vendor/pear/text_languagedetect')
-rw-r--r-- | vendor/pear/text_languagedetect/.gitignore | 6 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/.travis.yml | 14 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/README.rst | 9 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/Text/LanguageDetect.php | 42 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/composer.json | 2 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/docs/confidence.php | 18 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/docs/errorhandling.php | 15 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/docs/example_clui.php | 35 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/docs/example_web.php | 72 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/docs/iso.php | 19 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/docs/languages.php | 11 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/docs/simple.php | 10 | ||||
-rw-r--r-- | vendor/pear/text_languagedetect/phpcs.xml | 10 |
13 files changed, 238 insertions, 25 deletions
diff --git a/vendor/pear/text_languagedetect/.gitignore b/vendor/pear/text_languagedetect/.gitignore new file mode 100644 index 000000000..d83ad3973 --- /dev/null +++ b/vendor/pear/text_languagedetect/.gitignore @@ -0,0 +1,6 @@ +# composer related +composer.lock +composer.phar +vendor +/README.html +/dist diff --git a/vendor/pear/text_languagedetect/.travis.yml b/vendor/pear/text_languagedetect/.travis.yml new file mode 100644 index 000000000..7cb79a61f --- /dev/null +++ b/vendor/pear/text_languagedetect/.travis.yml @@ -0,0 +1,14 @@ +language: php +sudo: false +php: + - 7.2 + - 7.3 + - 7.4 +install: + - pear install pear/PHP_CodeSniffer + - composer install + - phpenv rehash +script: + - composer validate + - ./vendor/bin/phpunit --coverage-text tests + - phpcs Text/ diff --git a/vendor/pear/text_languagedetect/README.rst b/vendor/pear/text_languagedetect/README.rst index 9381c7f7e..15fbd87bb 100644 --- a/vendor/pear/text_languagedetect/README.rst +++ b/vendor/pear/text_languagedetect/README.rst @@ -155,3 +155,12 @@ Unit test status .. image:: https://travis-ci.org/pear/Text_LanguageDetect.svg?branch=master :target: https://travis-ci.org/pear/Text_LanguageDetect + + +Notes +===== +Where are the data from? + + I don't recall where I got the original data set. + It's just the frequencies of 3-letter combinations in each supported language. + It could be generated from a few random wikipedia pages from each language. diff --git a/vendor/pear/text_languagedetect/Text/LanguageDetect.php b/vendor/pear/text_languagedetect/Text/LanguageDetect.php index 420faa941..850b1beaf 100644 --- a/vendor/pear/text_languagedetect/Text/LanguageDetect.php +++ b/vendor/pear/text_languagedetect/Text/LanguageDetect.php @@ -12,9 +12,9 @@ * @link http://pear.php.net/package/Text_LanguageDetect/ */ -//require_once 'Text/LanguageDetect/Exception.php'; -//require_once 'Text/LanguageDetect/Parser.php'; -//require_once 'Text/LanguageDetect/ISO639.php'; +require_once 'Text/LanguageDetect/Exception.php'; +require_once 'Text/LanguageDetect/Parser.php'; +require_once 'Text/LanguageDetect/ISO639.php'; /** * Detects the language of a given piece of text. @@ -189,7 +189,7 @@ class Text_LanguageDetect */ protected function _get_data_loc($fname) { - if ($fname{0} == '/' || $fname{0} == '.') { + if ($fname[0] == '/' || $fname[0] == '.') { // if filename starts with a slash, assume it's an absolute pathname // and skip whatever is in $this->_data_dir return $fname; @@ -247,12 +247,6 @@ class Text_LanguageDetect protected function _checkTrigram($trigram) { if (!is_array($trigram)) { - if (ini_get('magic_quotes_runtime')) { - throw new Text_LanguageDetect_Exception( - 'Error loading database. Try turning magic_quotes_runtime off.', - Text_LanguageDetect_Exception::MAGIC_QUOTES - ); - } throw new Text_LanguageDetect_Exception( 'Language database is not an array.', Text_LanguageDetect_Exception::DB_NOT_ARRAY @@ -1470,31 +1464,31 @@ class Text_LanguageDetect case 1: // normal ASCII-7 byte // 0xxxxxxx --> 0xxxxxxx - return ord($char{0}); + return ord($char[0]); case 2: // 2 byte unicode // 110zzzzx 10xxxxxx --> 00000zzz zxxxxxxx - $z = (ord($char{0}) & 0x000001F) << 6; - $x = (ord($char{1}) & 0x0000003F); + $z = (ord($char[0]) & 0x000001F) << 6; + $x = (ord($char[1]) & 0x0000003F); return ($z | $x); case 3: // 3 byte unicode // 1110zzzz 10zxxxxx 10xxxxxx --> zzzzzxxx xxxxxxxx - $z = (ord($char{0}) & 0x0000000F) << 12; - $x1 = (ord($char{1}) & 0x0000003F) << 6; - $x2 = (ord($char{2}) & 0x0000003F); + $z = (ord($char[0]) & 0x0000000F) << 12; + $x1 = (ord($char[1]) & 0x0000003F) << 6; + $x2 = (ord($char[2]) & 0x0000003F); return ($z | $x1 | $x2); case 4: // 4 byte unicode // 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx --> // 000zzzzz xxxxxxxx xxxxxxxx - $z1 = (ord($char{0}) & 0x00000007) << 18; - $z2 = (ord($char{1}) & 0x0000003F) << 12; - $x1 = (ord($char{2}) & 0x0000003F) << 6; - $x2 = (ord($char{3}) & 0x0000003F); + $z1 = (ord($char[0]) & 0x00000007) << 18; + $z2 = (ord($char[1]) & 0x0000003F) << 12; + $x1 = (ord($char[2]) & 0x0000003F) << 6; + $x2 = (ord($char[3]) & 0x0000003F); return ($z1 | $z2 | $x1 | $x2); } } @@ -1514,7 +1508,7 @@ class Text_LanguageDetect */ protected static function _next_char($str, &$counter, $special_convert = false) { - $char = $str{$counter++}; + $char = $str[$counter++]; $ord = ord($char); // for a description of the utf8 system see @@ -1538,7 +1532,7 @@ class Text_LanguageDetect } elseif ($ord >> 5 == 6) { // two-byte char // multi-byte chars - $nextchar = $str{$counter++}; // get next byte + $nextchar = $str[$counter++]; // get next byte // lower-casing of non-ascii characters is still incomplete @@ -1580,12 +1574,12 @@ class Text_LanguageDetect } elseif ($ord >> 4 == 14) { // three-byte char // tag on next 2 bytes - return $char . $str{$counter++} . $str{$counter++}; + return $char . $str[$counter++] . $str[$counter++]; } elseif ($ord >> 3 == 30) { // four-byte char // tag on next 3 bytes - return $char . $str{$counter++} . $str{$counter++} . $str{$counter++}; + return $char . $str[$counter++] . $str[$counter++] . $str[$counter++]; } else { // error? diff --git a/vendor/pear/text_languagedetect/composer.json b/vendor/pear/text_languagedetect/composer.json index fc94c6506..a65a17d40 100644 --- a/vendor/pear/text_languagedetect/composer.json +++ b/vendor/pear/text_languagedetect/composer.json @@ -27,6 +27,6 @@ "ext-mbstring": "May require the mbstring PHP extension" }, "require-dev": { - "phpunit/phpunit": "*" + "phpunit/phpunit": "8.*|9.*" } } diff --git a/vendor/pear/text_languagedetect/docs/confidence.php b/vendor/pear/text_languagedetect/docs/confidence.php new file mode 100644 index 000000000..5be0fb9b6 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/confidence.php @@ -0,0 +1,18 @@ +<?php +require_once 'Text/LanguageDetect.php'; + +$text = 'Was wäre, wenn ich Ihnen das jetzt sagen würde?'; + +$ld = new Text_LanguageDetect(); +//3 most probable languages +$results = $ld->detect($text, 3); + +foreach ($results as $language => $confidence) { + echo $language . ': ' . number_format($confidence, 2) . "\n"; +} + +//output: +//german: 0.35 +//dutch: 0.25 +//swedish: 0.20 +?>
\ No newline at end of file diff --git a/vendor/pear/text_languagedetect/docs/errorhandling.php b/vendor/pear/text_languagedetect/docs/errorhandling.php new file mode 100644 index 000000000..b68e42476 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/errorhandling.php @@ -0,0 +1,15 @@ +<?php +/** + * How to handle errors + */ +require_once 'Text/LanguageDetect.php'; +require_once 'Text/LanguageDetect/Exception.php'; + +try { + $ld = new Text_LanguageDetect(); + $lang = $ld->detectSimple('Das ist ein kleiner Text'); + echo "Language is: $lang\n"; +} catch (Text_LanguageDetect_Exception $e) { + echo 'An error occured! Message: ' . $e . "\n"; +} +?>
\ No newline at end of file diff --git a/vendor/pear/text_languagedetect/docs/example_clui.php b/vendor/pear/text_languagedetect/docs/example_clui.php new file mode 100644 index 000000000..210b0eec4 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/example_clui.php @@ -0,0 +1,35 @@ +<?php + +/** + * example usage (CLI) + * + * @package Text_LanguageDetect + * @version CVS: $Id$ + */ + +require_once 'Text/LanguageDetect.php'; + +$l = new Text_LanguageDetect; + +$stdin = fopen('php://stdin', 'r'); + +echo "Supported languages:\n"; +$langs = $l->getLanguages(); +sort($langs); +echo join(', ', $langs); + +echo "\ntotal ", count($langs), "\n\n"; + +while ($line = fgets($stdin)) { + $result = $l->detect($line, 4); + print_r($result); + $blocks = $l->detectUnicodeBlocks($line, true); + print_r($blocks); +} + +fclose($stdin); +unset($l); + +/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ + +?> diff --git a/vendor/pear/text_languagedetect/docs/example_web.php b/vendor/pear/text_languagedetect/docs/example_web.php new file mode 100644 index 000000000..bee8f51a4 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/example_web.php @@ -0,0 +1,72 @@ +<?php + +/** + * example usage (web) + * + * @package Text_LanguageDetect + * @version CVS: $Id$ + */ + +// browsers will encode multi-byte characters wrong unless they think the page is utf8-encoded +header('Content-type: text/html; charset=utf-8', true); + +require_once 'Text/LanguageDetect.php'; + +$l = new Text_LanguageDetect; +if (isset($_REQUEST['q'])) { + $q = stripslashes($_REQUEST['q']); +} + +?> +<html> +<head> +<title>Text_LanguageDetect demonstration</title> +</head> +<body> +<h2>Text_LanguageDetect</h2> +<? +echo "<small>Supported languages:\n"; +$langs = $l->getLanguages(); +sort($langs); +foreach ($langs as $lang) { + echo ucfirst($lang), ', '; + $i++; +} + +echo "<br />total $i</small><br /><br />"; + +?> +<form method="post"> +Enter text to identify language (at least a couple of sentences):<br /> +<textarea name="q" wrap="virtual" cols="80" rows="8"><?= $q ?></textarea> +<br /> +<input type="submit" value="Submit" /> +</form> +<? +if (isset($q) && strlen($q)) { + $len = $l->utf8strlen($q); + if ($len < 20) { // this value picked somewhat arbitrarily + echo "Warning: string not very long ($len chars)<br />\n"; + } + + $result = $l->detectConfidence($q); + + if ($result == null) { + echo "Text_LanguageDetect cannot identify this piece of text. <br /><br />\n"; + } else { + echo "Text_LanguageDetect thinks this text is written in <b>{$result['language']}</b> ({$result['similarity']}, {$result['confidence']})<br /><br />\n"; + } + + $result = $l->detectUnicodeBlocks($q, false); + if (!empty($result)) { + arsort($result); + echo "Unicode blocks present: ", join(', ', array_keys($result)), "\n<br /><br />"; + } +} + +unset($l); + +/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ + +?> +</body></html> diff --git a/vendor/pear/text_languagedetect/docs/iso.php b/vendor/pear/text_languagedetect/docs/iso.php new file mode 100644 index 000000000..547316313 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/iso.php @@ -0,0 +1,19 @@ +<?php +/** + * Demonstrates how to use ISO language codes. + * + * The "name mode" changes the way languages are accepted and returned. + */ +require_once 'Text/LanguageDetect.php'; +$ld = new Text_LanguageDetect(); + +//will output the ISO 639-1 two-letter language code +// "de" +$ld->setNameMode(2); +echo $ld->detectSimple('Das ist ein kleiner Text') . "\n"; + +//will output the ISO 639-2 three-letter language code +// "deu" +$ld->setNameMode(3); +echo $ld->detectSimple('Das ist ein kleiner Text') . "\n"; +?> diff --git a/vendor/pear/text_languagedetect/docs/languages.php b/vendor/pear/text_languagedetect/docs/languages.php new file mode 100644 index 000000000..f6d022c22 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/languages.php @@ -0,0 +1,11 @@ +<?php +/** + * List all supported languages + */ +require_once 'Text/LanguageDetect.php'; +$ld = new Text_LanguageDetect(); + +foreach ($ld->getLanguages() as $lang) { + echo $lang . "\n"; +} +?> diff --git a/vendor/pear/text_languagedetect/docs/simple.php b/vendor/pear/text_languagedetect/docs/simple.php new file mode 100644 index 000000000..0bfc11eb0 --- /dev/null +++ b/vendor/pear/text_languagedetect/docs/simple.php @@ -0,0 +1,10 @@ +<?php +require_once 'Text/LanguageDetect.php'; + +$text = 'Was wäre, wenn ich Ihnen das jetzt sagen würde?'; + +$ld = new Text_LanguageDetect(); +$result = $ld->detectSimple($text); +var_dump($result); +//output: german +?> diff --git a/vendor/pear/text_languagedetect/phpcs.xml b/vendor/pear/text_languagedetect/phpcs.xml new file mode 100644 index 000000000..9eeccf534 --- /dev/null +++ b/vendor/pear/text_languagedetect/phpcs.xml @@ -0,0 +1,10 @@ +<?xml version="1.0"?> +<ruleset name="PEAR-textlanguagedetect"> + <rule ref="PEAR"> + <!-- we keep the old php4-style variable names for now --> + <exclude name="PEAR.NamingConventions.ValidFunctionName.PublicUnderscore"/> + <exclude name="PEAR.NamingConventions.ValidVariableName.PublicUnderscore"/> + <!-- we keep the method names for BC reasons --> + <exclude name="PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps"/> + </rule> +</ruleset> |