diff options
author | Harald Eilertsen <haraldei@anduin.net> | 2023-12-16 16:05:52 +0100 |
---|---|---|
committer | Harald Eilertsen <haraldei@anduin.net> | 2023-12-16 16:05:52 +0100 |
commit | 19dd1fe86605bc85e8a0bf4efd6f195db258f60a (patch) | |
tree | 5d5b1fac3ad1760189ea0daf1bf5639e4ba0d100 /vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php | |
parent | 6a5f78543fb2f5329e54209253b8c0d0f1d7a91d (diff) | |
parent | 69266cd6c65d228320dede32a343a9d3f3ea63df (diff) | |
download | volse-hubzilla-19dd1fe86605bc85e8a0bf4efd6f195db258f60a.tar.gz volse-hubzilla-19dd1fe86605bc85e8a0bf4efd6f195db258f60a.tar.bz2 volse-hubzilla-19dd1fe86605bc85e8a0bf4efd6f195db258f60a.zip |
Merge branch 'dev' into tests/test-db-setup-wip
Diffstat (limited to 'vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php')
-rw-r--r-- | vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php b/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php new file mode 100644 index 000000000..68bb6013a --- /dev/null +++ b/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php @@ -0,0 +1,29 @@ +<?php + +declare(strict_types = 1); + +namespace LanguageDetection\Tokenizer; + +/** + * Class WhitespaceTokenizer + * + * @copyright Patrick Schur + * @license https://opensource.org/licenses/mit-license.html MIT + * @author Patrick Schur <patrick_schur@outlook.de> + * @package LanguageDetection + */ +class WhitespaceTokenizer implements TokenizerInterface +{ + /** + * @param string $str + * @return array + */ + public function tokenize(string $str): array + { + return \array_map(function ($word) { + return "_{$word}_"; + }, + \preg_split('/[^\pL]+(?<![\x27\x60\x{2019}])/u', $str, -1, PREG_SPLIT_NO_EMPTY) + ); + } +}
\ No newline at end of file |