aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php
diff options
context:
space:
mode:
authorHarald Eilertsen <haraldei@anduin.net>2023-12-16 16:05:52 +0100
committerHarald Eilertsen <haraldei@anduin.net>2023-12-16 16:05:52 +0100
commit19dd1fe86605bc85e8a0bf4efd6f195db258f60a (patch)
tree5d5b1fac3ad1760189ea0daf1bf5639e4ba0d100 /vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php
parent6a5f78543fb2f5329e54209253b8c0d0f1d7a91d (diff)
parent69266cd6c65d228320dede32a343a9d3f3ea63df (diff)
downloadvolse-hubzilla-19dd1fe86605bc85e8a0bf4efd6f195db258f60a.tar.gz
volse-hubzilla-19dd1fe86605bc85e8a0bf4efd6f195db258f60a.tar.bz2
volse-hubzilla-19dd1fe86605bc85e8a0bf4efd6f195db258f60a.zip
Merge branch 'dev' into tests/test-db-setup-wip
Diffstat (limited to 'vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php')
-rw-r--r--vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php29
1 files changed, 29 insertions, 0 deletions
diff --git a/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php b/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php
new file mode 100644
index 000000000..68bb6013a
--- /dev/null
+++ b/vendor/patrickschur/language-detection/src/LanguageDetection/Tokenizer/WhitespaceTokenizer.php
@@ -0,0 +1,29 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace LanguageDetection\Tokenizer;
+
+/**
+ * Class WhitespaceTokenizer
+ *
+ * @copyright Patrick Schur
+ * @license https://opensource.org/licenses/mit-license.html MIT
+ * @author Patrick Schur <patrick_schur@outlook.de>
+ * @package LanguageDetection
+ */
+class WhitespaceTokenizer implements TokenizerInterface
+{
+ /**
+ * @param string $str
+ * @return array
+ */
+ public function tokenize(string $str): array
+ {
+ return \array_map(function ($word) {
+ return "_{$word}_";
+ },
+ \preg_split('/[^\pL]+(?<![\x27\x60\x{2019}])/u', $str, -1, PREG_SPLIT_NO_EMPTY)
+ );
+ }
+} \ No newline at end of file