6 files changed, 501 insertions, 0 deletions
diff --git a/vendor/patrickschur/language-detection/src/LanguageDetection/Language.php b/vendor/patrickschur/language-detection/src/LanguageDetection/Language.php
new file mode 100644
index 000000000..c369a3367
--- /dev/null
+++ b/vendor/patrickschur/language-detection/src/LanguageDetection/Language.php
@@ -0,0 +1,102 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace LanguageDetection;
+
+/**
+ * Class Language
+ *
+ * @copyright Patrick Schur
+ * @license https://opensource.org/licenses/mit-license.html MIT
+ * @author Patrick Schur <patrick_schur@outlook.de>
+ * @package LanguageDetection
+ */
+class Language extends NgramParser
+{
+    /**
+     * @var array<string, array<string, int>>
+     */
+    protected $tokens = [];
+
+    /**
+     * Loads all language files
+     *
+     * @param array $lang List of ISO 639-1 codes, that should be used in the detection phase
+     * @param string $dirname Name of the directory where the translations files are located
+     */
+    public function __construct(array $lang = [], string $dirname = '')
+    {
+        if (empty($dirname))
+        {
+            $dirname = __DIR__ . '/../../resources/*/*.php';
+        }
+        else if (!\is_dir($dirname) || !\is_readable($dirname))
+        {
+            throw new \InvalidArgumentException('Provided directory could not be found or is not readable');
+        }
+        else
+        {
+            $dirname = \rtrim($dirname, '/');
+            $dirname .= '/*/*.php';
+        }
+
+        $isEmpty = empty($lang);
+        $tokens = [];
+
+        foreach (\glob($dirname) as $file)
+        {
+            if ($isEmpty || \in_array(\basename($file, '.php'), $lang))
+            {
+                $tokens += require $file;
+            }
+        }
+
+        foreach ($tokens as $lang => $value) {
+            $this->tokens[$lang] = \array_flip($value);
+        }
+    }
+
+    /**
+     * Detects the language from a given text string
+     *
+     * @param string $str
+     * @return LanguageResult
+     */
+    public function detect(string $str): LanguageResult
+    {
+        $str = \mb_strtolower($str);
+
+        $samples = $this->getNgrams($str);
+
+        $result = [];
+
+        if (\count($samples) > 0)
+        {
+            foreach ($this->tokens as $lang => $value)
+            {
+                $index = $sum = 0;
+
+                foreach ($samples as $v)
+                {
+                    if (isset($value[$v]))
+                    {
+                        $x = $index++ - $value[$v];
+                        $y = $x >> (PHP_INT_SIZE * 8);
+                        $sum += ($x + $y) ^ $y;
+                        continue;
+                    }
+
+                    $sum += $this->maxNgrams;
+                    ++$index;
+                }
+
+                $result[$lang] = 1 - ($sum / ($this->maxNgrams * $index));
+            }
+
+            \arsort($result, SORT_NUMERIC);
+        }
+
+        return new LanguageResult($result);
+    }
+}
diff --git a/vendor/patrickschur/language-detection/src/LanguageDetection/LanguageResult.php b/vendor/patrickschur/language-detection/src/LanguageDetection/LanguageResult.php
new file mode 100644
index 000000000..5b89ff44e
--- /dev/null
+++ b/vendor/patrickschur/language-detection/src/LanguageDetection/LanguageResult.php
@@ -0,0 +1,149 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace LanguageDetection;
+
+/**
+ * Class LanguageResult
+ *
+ * @copyright Patrick Schur
+ * @license https://opensource.org/licenses/mit-license.html MIT
+ * @author Patrick Schur <patrick_schur@outlook.de>
+ * @package LanguageDetection
+ */
+class LanguageResult implements \JsonSerializable, \IteratorAggregate, \ArrayAccess
+{
+    const THRESHOLD = .025;
+
+    /**
+     * @var array
+     */
+    private $result = [];
+
+    /**
+     * LanguageResult constructor.
+     * @param array $result
+     */
+    public function __construct(array $result = [])
+    {
+        $this->result = $result;
+    }
+
+    /**
+     * @param mixed $offset
+     * @return bool
+     */
+    public function offsetExists($offset): bool
+    {
+        return isset($this->result[$offset]);
+    }
+
+    /**
+     * @param mixed $offset
+     * @return mixed|null
+     */
+    public function offsetGet($offset): ?float
+    {
+        return $this->result[$offset] ?? null;
+    }
+
+    /**
+     * @param mixed $offset
+     * @param mixed $value
+     * @return void
+     */
+    public function offsetSet($offset, $value): void
+    {
+        if (null === $offset) {
+            $this->result[] = $value;
+        } else {
+            $this->result[$offset] = $value;
+        }
+    }
+
+    /**
+     * @param mixed $offset
+     */
+    public function offsetUnset($offset): void
+    {
+        unset($this->result[$offset]);
+    }
+
+    /**
+     * @return array
+     */
+    public function jsonSerialize(): array
+    {
+        return $this->result;
+    }
+
+    /**
+     * @return string
+     */
+    public function __toString(): string
+    {
+        return (string) \key($this->result);
+    }
+
+    /**
+     * @param \string[] ...$whitelist
+     * @return LanguageResult
+     */
+    public function whitelist(string ...$whitelist): LanguageResult
+    {
+        return new LanguageResult(\array_intersect_key($this->result, \array_flip($whitelist)));
+    }
+
+    /**
+     * @param \string[] ...$blacklist
+     * @return LanguageResult
+     */
+    public function blacklist(string ...$blacklist): LanguageResult
+    {
+        return new LanguageResult(\array_diff_key($this->result, \array_flip($blacklist)));
+    }
+
+    /**
+     * @return array
+     */
+    public function close(): array
+    {
+        return $this->result;
+    }
+
+    /**
+     * @return LanguageResult
+     */
+    public function bestResults(): LanguageResult
+    {
+        if (!\count($this->result))
+        {
+            return new LanguageResult;
+        }
+
+        $first = \array_values($this->result)[0];
+
+        return new LanguageResult(\array_filter($this->result, function ($value) use ($first) {
+            return ($first - $value) <= self::THRESHOLD ? true : false;
+        }));
+    }
+
+    /**
+     * @return \ArrayIterator
+     */
+    public function getIterator(): \ArrayIterator
+    {
+        return new \ArrayIterator($this->result);
+    }
+
+    /**
+     * @param int $offset
+     * @param int|null $length
+     * @return LanguageResult
+     */
+    public function limit(int $offset, int $length = null): LanguageResult
+    {
+        return new LanguageResult(\array_slice($this->result, $offset, $length));
+    }
+}
diff --git a/vendor/patrickschur/language-detection/src/LanguageDetection/NgramParser.php b/vendor/patrickschur/language-detection/src/LanguageDetection/NgramParser.php
new file mode 100644
index 000000000..8b69241eb
--- /dev/null
+++ b/vendor/patrickschur/language-detection/src/LanguageDetection/NgramParser.php
@@ -0,0 +1,153 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace LanguageDetection;
+
+use LanguageDetection\Tokenizer\TokenizerInterface;
+use LanguageDetection\Tokenizer\WhitespaceTokenizer;
+
+/**
+ * Class NgramParser
+ *
+ * @copyright Patrick Schur
+ * @license https://opensource.org/licenses/mit-license.html MIT
+ * @author Patrick Schur <patrick_schur@outlook.de>
+ * @package LanguageDetection
+ */
+abstract class NgramParser
+{
+    /**
+     * @var int
+     */
+    protected $minLength = 1;
+
+    /**
+     * @var int
+     */
+    protected $maxLength = 3;
+
+    /**
+     * @var int
+     */
+    protected $maxNgrams = 310;
+
+    /**
+     * @var TokenizerInterface
+     */
+    protected $tokenizer = null;
+
+    /**
+     * @param int $minLength
+     * @throws \LengthException
+     */
+    public function setMinLength(int $minLength)
+    {
+        if ($minLength <= 0 || $minLength >= $this->maxLength)
+        {
+            throw new \LengthException('$minLength must be greater than zero and less than $this->maxLength.');
+        }
+
+        $this->minLength = $minLength;
+    }
+
+    /**
+     * @param int $maxLength
+     * @throws \LengthException
+     */
+    public function setMaxLength(int $maxLength)
+    {
+        if ($maxLength <= $this->minLength)
+        {
+            throw new \LengthException('$maxLength must be greater than $this->minLength.');
+        }
+
+        $this->maxLength = $maxLength;
+    }
+
+    /**
+     * @param int $maxNgrams
+     * @throws \LengthException
+     */
+    public function setMaxNgrams(int $maxNgrams)
+    {
+        if ($maxNgrams <= 0)
+        {
+            throw new \LengthException('$maxNgrams must be greater than zero.');
+        }
+
+        $this->maxNgrams = $maxNgrams;
+    }
+
+    /**
+     * Sets the tokenizer
+     *
+     * @param TokenizerInterface $tokenizer
+     */
+    public function setTokenizer(TokenizerInterface $tokenizer)
+    {
+        $this->tokenizer = $tokenizer;
+    }
+
+    /**
+     * @param string $str
+     * @return array
+     */
+    private function tokenize(string $str)
+    {
+        if (null === $this->tokenizer)
+        {
+            $this->tokenizer = new WhitespaceTokenizer();
+        }
+
+        return $this->tokenizer->tokenize($str);
+    }
+
+    /**
+     * @param string $str
+     * @return array
+     */
+    protected function getNgrams(string $str): array
+    {
+        $tokens = [];
+
+        foreach ($this->tokenize($str) as $word)
+        {
+            $l = \mb_strlen($word);
+
+            for ($i = $this->minLength; $i <= $this->maxLength; ++$i)
+            {
+                for ($j = 0; ($i + $j - 1) < $l; ++$j, ++$tmp)
+                {
+                    $tmp = &$tokens[$i][\mb_substr($word, $j, $i)];
+                }
+            }
+        }
+
+        foreach ($tokens as $i => $token)
+        {
+            $sum = \array_sum($token);
+
+            foreach ($token as $j => $value)
+            {
+                $tokens[$i][$j] = $value / $sum;
+            }
+        }
+
+        if (!\count($tokens))
+        {
+            return [];
+        }
+
+        $tokens = \array_merge(...$tokens);
+        unset($tokens['_']);
+
+        \arsort($tokens, SORT_NUMERIC);
+
+        return \array_slice(
+            \array_keys($tokens),
+            0,
+            $this->maxNgrams
+        );
+    }
+}
diff --git a/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/TokenizerInterface.php b/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/TokenizerInterface.php
new file mode 100644
index 000000000..f06074628
--- /dev/null
+++ b/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/TokenizerInterface.php
@@ -0,0 +1,18 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace LanguageDetection\Tokenizer;
+
+/**
+ * Interface TokenizerInterface
+ *
+ * @copyright Patrick Schur
+ * @license https://opensource.org/licenses/mit-license.html MIT
+ * @author Patrick Schur <patrick_schur@outlook.de>
+ * @package LanguageDetection
+ */
+interface TokenizerInterface
+{
+    public function tokenize(string $str): array;
+}
+\ No newline at end of file
diff --git a/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php b/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php
new file mode 100644
index 000000000..68bb6013a
--- /dev/null
+++ b/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php
@@ -0,0 +1,29 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace LanguageDetection\Tokenizer;
+
+/**
+ * Class WhitespaceTokenizer
+ *
+ * @copyright Patrick Schur
+ * @license https://opensource.org/licenses/mit-license.html MIT
+ * @author Patrick Schur <patrick_schur@outlook.de>
+ * @package LanguageDetection
+ */
+class WhitespaceTokenizer implements TokenizerInterface
+{
+    /**
+     * @param string $str
+     * @return array
+     */
+    public function tokenize(string $str): array
+    {
+        return \array_map(function ($word) {
+                return "_{$word}_";
+            },
+            \preg_split('/[^\pL]+(?<![\x27\x60\x{2019}])/u', $str, -1, PREG_SPLIT_NO_EMPTY)
+        );
+    }
+}
+\ No newline at end of file
diff --git a/vendor/patrickschur/language-detection/src/LanguageDetection/Trainer.php b/vendor/patrickschur/language-detection/src/LanguageDetection/Trainer.php
new file mode 100644
index 000000000..2bc5e6761
--- /dev/null
+++ b/vendor/patrickschur/language-detection/src/LanguageDetection/Trainer.php
@@ -0,0 +1,50 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace LanguageDetection;
+
+/**
+ * Class Trainer
+ *
+ * @copyright Patrick Schur
+ * @license https://opensource.org/licenses/mit-license.html MIT
+ * @author Patrick Schur <patrick_schur@outlook.de>
+ * @package LanguageDetection
+ */
+class Trainer extends NgramParser
+{
+    /**
+     * Generates language profiles for all language files
+     *
+     * @param string $dirname Name of the directory where the translations files are located
+     * @return void
+     */
+    public function learn(string $dirname = '')
+    {
+        if (empty($dirname))
+        {
+            $dirname = __DIR__ . '/../../resources/*/*.txt';
+        }
+        else if (!\is_dir($dirname) || !\is_readable($dirname))
+        {
+            throw new \InvalidArgumentException('Provided directory could not be found or is not readable');
+        }
+        else
+        {
+            $dirname = \rtrim($dirname, '/');
+            $dirname .= '/*/*.txt';
+        }
+
+        /** @var \GlobIterator $txt */
+        foreach (new \GlobIterator($dirname) as $txt)
+        {
+            $content = \mb_strtolower(\file_get_contents($txt->getPathname()));
+
+            \file_put_contents(
+                \substr_replace($txt->getPathname(), 'php', -3),
+                \sprintf("<?php\n\nreturn %s;\n", var_export([ $txt->getBasename('.txt') => $this->getNgrams($content) ], true))
+            );
+        }
+    }
+}