diff options
author | Mario <mario@mariovavti.com> | 2022-02-11 10:01:39 +0000 |
---|---|---|
committer | Mario <mario@mariovavti.com> | 2022-02-11 10:01:39 +0000 |
commit | bf30cfd8a451c104ba8e8d9d65d4514e60bb9b83 (patch) | |
tree | 15bdcdd4525b3304b73e4d17f5774b0ae2b293ee /vendor/voku/portable-ascii/src | |
parent | 139ffae3674e59307b46c67b0dcf77be9ec87b19 (diff) | |
download | volse-hubzilla-bf30cfd8a451c104ba8e8d9d65d4514e60bb9b83.tar.gz volse-hubzilla-bf30cfd8a451c104ba8e8d9d65d4514e60bb9b83.tar.bz2 volse-hubzilla-bf30cfd8a451c104ba8e8d9d65d4514e60bb9b83.zip |
more composer updates
Diffstat (limited to 'vendor/voku/portable-ascii/src')
-rw-r--r-- | vendor/voku/portable-ascii/src/voku/helper/ASCII.php | 92 |
1 files changed, 76 insertions, 16 deletions
diff --git a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php index d4ec32ab1..aa676abda 100644 --- a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php +++ b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php @@ -200,7 +200,7 @@ final class ASCII * * @return string[] * - * @psalm-return array<string, string> + * @phpstan-return array<string, string> */ public static function getAllLanguages(): array { @@ -238,7 +238,7 @@ final class ASCII * * @return array * - * @psalm-return array<string, array<string , string>> + * @phpstan-return array<string, array<string , string>> */ public static function charsArray(bool $replace_extra_symbols = false): array { @@ -268,7 +268,7 @@ final class ASCII * @return array * <p>An array of replacements.</p> * - * @psalm-return array<string, array<int, string>> + * @phpstan-return array<string, array<int, string>> */ public static function charsArrayWithMultiLanguageValues(bool $replace_extra_symbols = false): array { @@ -300,10 +300,7 @@ final class ASCII $CHARS_ARRAY[$cacheKey] = $return; - /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */ - /** @var array<string, array<int, string>> $return */ - $return = $return; - + /** @var array<string, array<int, string>> $return - hack for phpstan */ return $return; } @@ -332,7 +329,7 @@ final class ASCII * @return array * <p>An array of replacements.</p> * - * @psalm-return array{orig: string[], replace: string[]}|array<string, string> + * @phpstan-return array{orig: string[], replace: string[]}|array<string, string> */ public static function charsArrayWithOneLanguage( string $language = self::ENGLISH_LANGUAGE_CODE, @@ -428,7 +425,7 @@ final class ASCII * @return array * <p>An array of replacements.</p> * - * @psalm-return array{orig: string[], replace: string[]}|array<string, string> + * @phpstan-return array{orig: string[], replace: string[]}|array<string, string> */ public static function charsArrayWithSingleLanguageValues( bool $replace_extra_symbols = false, @@ -616,7 +613,7 @@ final class ASCII * @param bool $keepNonBreakingSpace [optional] <p>Set to true, to keep non-breaking-spaces.</p> * @param bool $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web) * bidirectional text chars.</p> - * @param bool $normalize_control_characters [optional] <p>Set to true, to convert LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p> + * @param bool $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p> * * @psalm-pure * @@ -645,16 +642,14 @@ final class ASCII "\x0d\x0c", // 'END OF LINE' "\xe2\x80\xa8", // 'LINE SEPARATOR' "\xe2\x80\xa9", // 'PARAGRAPH SEPARATOR' - "\x0c", // 'FORM FEED' - "\x0d", // 'CARRIAGE RETURN' - "\x0b", // 'VERTICAL TAB' + "\x0c", // 'FORM FEED' // "\f" + "\x0b", // 'VERTICAL TAB' // "\v" ], [ "\n", "\n", "\n", "\n", - "\n", "\t", ], $str @@ -738,6 +733,71 @@ final class ASCII } /** + * WARNING: This method will return broken characters and is only for special cases. + * + * Convert two UTF-8 encoded string to a single-byte strings suitable for + * functions that need the same string length after the conversion. + * + * The function simply uses (and updates) a tailored dynamic encoding + * (in/out map parameter) where non-ascii characters are remapped to + * the range [128-255] in order of appearance. + * + * @param string $str1 + * @param string $str2 + * + * @return string[] + * + * @phpstan-return array{0: string, 1: string} + */ + public static function to_ascii_remap(string $str1, string $str2): array + { + $charMap = []; + $str1 = self::to_ascii_remap_intern($str1, $charMap); + $str2 = self::to_ascii_remap_intern($str2, $charMap); + + return [$str1, $str2]; + } + + /** + * WARNING: This method will return broken characters and is only for special cases. + * + * Convert a UTF-8 encoded string to a single-byte string suitable for + * functions that need the same string length after the conversion. + * + * The function simply uses (and updates) a tailored dynamic encoding + * (in/out map parameter) where non-ascii characters are remapped to + * the range [128-255] in order of appearance. + * + * Thus, it supports up to 128 different multibyte code points max over + * the whole set of strings sharing this encoding. + * + * Source: https://github.com/KEINOS/mb_levenshtein + * + * @param string $str UTF-8 string to be converted to extended ASCII. + * @return string Mapped borken string. + */ + private static function to_ascii_remap_intern(string $str, array &$map): string + { + // find all utf-8 characters + $matches = []; + if (!\preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) { + return $str; // plain ascii string + } + + // update the encoding map with the characters not already met + $mapCount = \count($map); + foreach ($matches[0] as $mbc) { + if (!isset($map[$mbc])) { + $map[$mbc] = \chr(128 + $mapCount); + $mapCount++; + } + } + + // finally remap non-ascii characters + return \strtr($str, $map); + } + + /** * Returns an ASCII version of the string. A set of non-ASCII characters are * replaced with their closest ASCII counterparts, and the rest are removed * by default. The language or locale of the source string can be supplied @@ -807,7 +867,7 @@ final class ASCII $EXTRA_SYMBOLS_CACHE === null ) { $EXTRA_SYMBOLS_CACHE = []; - foreach (self::$ASCII_EXTRAS ?? [] as $extrasLanguageTmp => $extrasDataTmp) { + foreach (self::$ASCII_EXTRAS ?? [] as $extrasDataTmp) { foreach ($extrasDataTmp as $extrasDataKeyTmp => $extrasDataValueTmp) { $EXTRA_SYMBOLS_CACHE[$extrasDataKeyTmp] = $extrasDataKeyTmp; } @@ -933,7 +993,7 @@ final class ASCII } } - foreach ($matches[0] as $keyTmp => $char) { + foreach ($matches[0] as $char) { if ( !isset($charDone[$char]) && |