diff options
author | Mario <mario@mariovavti.com> | 2023-11-25 17:12:28 +0100 |
---|---|---|
committer | Mario <mario@mariovavti.com> | 2023-11-25 17:12:28 +0100 |
commit | 0fd8e02a884a2b040dca62ab5d9674db5f6a070b (patch) | |
tree | 586ee43f32f6f14368c09026f21dcd3244ea24b6 /vendor/patrickschur/language-detection/src/LanguageDetection/Language.php | |
parent | 82e704ec5b107823c09f1387e9091adee53a4c2d (diff) | |
parent | 55c4bfb67009c598f25b1a8189604bfffa73dfbb (diff) | |
download | volse-hubzilla-0fd8e02a884a2b040dca62ab5d9674db5f6a070b.tar.gz volse-hubzilla-0fd8e02a884a2b040dca62ab5d9674db5f6a070b.tar.bz2 volse-hubzilla-0fd8e02a884a2b040dca62ab5d9674db5f6a070b.zip |
Merge branch '8.8RC'8.8
Diffstat (limited to 'vendor/patrickschur/language-detection/src/LanguageDetection/Language.php')
-rw-r--r-- | vendor/patrickschur/language-detection/src/LanguageDetection/Language.php | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/vendor/patrickschur/language-detection/src/LanguageDetection/Language.php b/vendor/patrickschur/language-detection/src/LanguageDetection/Language.php new file mode 100644 index 000000000..c369a3367 --- /dev/null +++ b/vendor/patrickschur/language-detection/src/LanguageDetection/Language.php @@ -0,0 +1,102 @@ +<?php + +declare(strict_types = 1); + +namespace LanguageDetection; + +/** + * Class Language + * + * @copyright Patrick Schur + * @license https://opensource.org/licenses/mit-license.html MIT + * @author Patrick Schur <patrick_schur@outlook.de> + * @package LanguageDetection + */ +class Language extends NgramParser +{ + /** + * @var array<string, array<string, int>> + */ + protected $tokens = []; + + /** + * Loads all language files + * + * @param array $lang List of ISO 639-1 codes, that should be used in the detection phase + * @param string $dirname Name of the directory where the translations files are located + */ + public function __construct(array $lang = [], string $dirname = '') + { + if (empty($dirname)) + { + $dirname = __DIR__ . '/../../resources/*/*.php'; + } + else if (!\is_dir($dirname) || !\is_readable($dirname)) + { + throw new \InvalidArgumentException('Provided directory could not be found or is not readable'); + } + else + { + $dirname = \rtrim($dirname, '/'); + $dirname .= '/*/*.php'; + } + + $isEmpty = empty($lang); + $tokens = []; + + foreach (\glob($dirname) as $file) + { + if ($isEmpty || \in_array(\basename($file, '.php'), $lang)) + { + $tokens += require $file; + } + } + + foreach ($tokens as $lang => $value) { + $this->tokens[$lang] = \array_flip($value); + } + } + + /** + * Detects the language from a given text string + * + * @param string $str + * @return LanguageResult + */ + public function detect(string $str): LanguageResult + { + $str = \mb_strtolower($str); + + $samples = $this->getNgrams($str); + + $result = []; + + if (\count($samples) > 0) + { + foreach ($this->tokens as $lang => $value) + { + $index = $sum = 0; + + foreach ($samples as $v) + { + if (isset($value[$v])) + { + $x = $index++ - $value[$v]; + $y = $x >> (PHP_INT_SIZE * 8); + $sum += ($x + $y) ^ $y; + continue; + } + + $sum += $this->maxNgrams; + ++$index; + } + + $result[$lang] = 1 - ($sum / ($this->maxNgrams * $index)); + } + + \arsort($result, SORT_NUMERIC); + } + + return new LanguageResult($result); + } +} |