aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/voku/portable-ascii/src/voku/helper/ASCII.php')
-rw-r--r--vendor/voku/portable-ascii/src/voku/helper/ASCII.php92
1 files changed, 76 insertions, 16 deletions
diff --git a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
index d4ec32ab1..aa676abda 100644
--- a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
+++ b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
@@ -200,7 +200,7 @@ final class ASCII
*
* @return string[]
*
- * @psalm-return array<string, string>
+ * @phpstan-return array<string, string>
*/
public static function getAllLanguages(): array
{
@@ -238,7 +238,7 @@ final class ASCII
*
* @return array
*
- * @psalm-return array<string, array<string , string>>
+ * @phpstan-return array<string, array<string , string>>
*/
public static function charsArray(bool $replace_extra_symbols = false): array
{
@@ -268,7 +268,7 @@ final class ASCII
* @return array
* <p>An array of replacements.</p>
*
- * @psalm-return array<string, array<int, string>>
+ * @phpstan-return array<string, array<int, string>>
*/
public static function charsArrayWithMultiLanguageValues(bool $replace_extra_symbols = false): array
{
@@ -300,10 +300,7 @@ final class ASCII
$CHARS_ARRAY[$cacheKey] = $return;
- /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
- /** @var array<string, array<int, string>> $return */
- $return = $return;
-
+ /** @var array<string, array<int, string>> $return - hack for phpstan */
return $return;
}
@@ -332,7 +329,7 @@ final class ASCII
* @return array
* <p>An array of replacements.</p>
*
- * @psalm-return array{orig: string[], replace: string[]}|array<string, string>
+ * @phpstan-return array{orig: string[], replace: string[]}|array<string, string>
*/
public static function charsArrayWithOneLanguage(
string $language = self::ENGLISH_LANGUAGE_CODE,
@@ -428,7 +425,7 @@ final class ASCII
* @return array
* <p>An array of replacements.</p>
*
- * @psalm-return array{orig: string[], replace: string[]}|array<string, string>
+ * @phpstan-return array{orig: string[], replace: string[]}|array<string, string>
*/
public static function charsArrayWithSingleLanguageValues(
bool $replace_extra_symbols = false,
@@ -616,7 +613,7 @@ final class ASCII
* @param bool $keepNonBreakingSpace [optional] <p>Set to true, to keep non-breaking-spaces.</p>
* @param bool $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
* bidirectional text chars.</p>
- * @param bool $normalize_control_characters [optional] <p>Set to true, to convert LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
+ * @param bool $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
*
* @psalm-pure
*
@@ -645,16 +642,14 @@ final class ASCII
"\x0d\x0c", // 'END OF LINE'
"\xe2\x80\xa8", // 'LINE SEPARATOR'
"\xe2\x80\xa9", // 'PARAGRAPH SEPARATOR'
- "\x0c", // 'FORM FEED'
- "\x0d", // 'CARRIAGE RETURN'
- "\x0b", // 'VERTICAL TAB'
+ "\x0c", // 'FORM FEED' // "\f"
+ "\x0b", // 'VERTICAL TAB' // "\v"
],
[
"\n",
"\n",
"\n",
"\n",
- "\n",
"\t",
],
$str
@@ -738,6 +733,71 @@ final class ASCII
}
/**
+ * WARNING: This method will return broken characters and is only for special cases.
+ *
+ * Convert two UTF-8 encoded string to a single-byte strings suitable for
+ * functions that need the same string length after the conversion.
+ *
+ * The function simply uses (and updates) a tailored dynamic encoding
+ * (in/out map parameter) where non-ascii characters are remapped to
+ * the range [128-255] in order of appearance.
+ *
+ * @param string $str1
+ * @param string $str2
+ *
+ * @return string[]
+ *
+ * @phpstan-return array{0: string, 1: string}
+ */
+ public static function to_ascii_remap(string $str1, string $str2): array
+ {
+ $charMap = [];
+ $str1 = self::to_ascii_remap_intern($str1, $charMap);
+ $str2 = self::to_ascii_remap_intern($str2, $charMap);
+
+ return [$str1, $str2];
+ }
+
+ /**
+ * WARNING: This method will return broken characters and is only for special cases.
+ *
+ * Convert a UTF-8 encoded string to a single-byte string suitable for
+ * functions that need the same string length after the conversion.
+ *
+ * The function simply uses (and updates) a tailored dynamic encoding
+ * (in/out map parameter) where non-ascii characters are remapped to
+ * the range [128-255] in order of appearance.
+ *
+ * Thus, it supports up to 128 different multibyte code points max over
+ * the whole set of strings sharing this encoding.
+ *
+ * Source: https://github.com/KEINOS/mb_levenshtein
+ *
+ * @param string $str UTF-8 string to be converted to extended ASCII.
+ * @return string Mapped borken string.
+ */
+ private static function to_ascii_remap_intern(string $str, array &$map): string
+ {
+ // find all utf-8 characters
+ $matches = [];
+ if (!\preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) {
+ return $str; // plain ascii string
+ }
+
+ // update the encoding map with the characters not already met
+ $mapCount = \count($map);
+ foreach ($matches[0] as $mbc) {
+ if (!isset($map[$mbc])) {
+ $map[$mbc] = \chr(128 + $mapCount);
+ $mapCount++;
+ }
+ }
+
+ // finally remap non-ascii characters
+ return \strtr($str, $map);
+ }
+
+ /**
* Returns an ASCII version of the string. A set of non-ASCII characters are
* replaced with their closest ASCII counterparts, and the rest are removed
* by default. The language or locale of the source string can be supplied
@@ -807,7 +867,7 @@ final class ASCII
$EXTRA_SYMBOLS_CACHE === null
) {
$EXTRA_SYMBOLS_CACHE = [];
- foreach (self::$ASCII_EXTRAS ?? [] as $extrasLanguageTmp => $extrasDataTmp) {
+ foreach (self::$ASCII_EXTRAS ?? [] as $extrasDataTmp) {
foreach ($extrasDataTmp as $extrasDataKeyTmp => $extrasDataValueTmp) {
$EXTRA_SYMBOLS_CACHE[$extrasDataKeyTmp] = $extrasDataKeyTmp;
}
@@ -933,7 +993,7 @@ final class ASCII
}
}
- foreach ($matches[0] as $keyTmp => $char) {
+ foreach ($matches[0] as $char) {
if (
!isset($charDone[$char])
&&