diff options
Diffstat (limited to 'vendor/voku/portable-ascii/src/voku/helper/ASCII.php')
-rw-r--r-- | vendor/voku/portable-ascii/src/voku/helper/ASCII.php | 165 |
1 files changed, 82 insertions, 83 deletions
diff --git a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php index 7d8b66de7..aa676abda 100644 --- a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php +++ b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php @@ -5,22 +5,6 @@ declare(strict_types=1); namespace voku\helper; /** - * ## π·πΊ Π ΡΡΡΠΊΠΈΠΌ Π³ΡΠ°ΠΆΠ΄Π°Π½Π°ΠΌ - * Π Π£ΠΊΡΠ°ΠΈΠ½Π΅ ΡΠ΅ΠΉΡΠ°Ρ ΠΈΠ΄Π΅Ρ Π²ΠΎΠΉΠ½Π°. Π‘ΠΈΠ»Π°ΠΌΠΈ Π Π€ Π½Π°Π½ΠΎΡΡΡΡΡ ΡΠ΄Π°ΡΡ ΠΏΠΎ Π³ΡΠ°ΠΆΠ΄Π°Π½ΡΠΊΠΎΠΉ ΠΈΠ½ΡΡΠ°ΡΡΡΡΠΊΡΡΡΠ΅ Π² [Π₯Π°ΡΡΠΊΠΎΠ²Π΅][1], [ΠΠΈΠ΅Π²Π΅][2], [Π§Π΅ΡΠ½ΠΈΠ³ΠΎΠ²Π΅][3], [Π‘ΡΠΌΠ°Ρ
][4], [ΠΡΠΏΠ΅Π½Π΅][5] ΠΈ Π΄Π΅ΡΡΡΠΊΠ°Ρ
Π΄ΡΡΠ³ΠΈΡ
Π³ΠΎΡΠΎΠ΄ΠΎΠ². ΠΠΈΠ±Π½ΡΡ Π»ΡΠ΄ΠΈ - ΠΈ Π³ΡΠ°ΠΆΠ΄Π°Π½ΡΠΊΠΎΠ΅ Π½Π°ΡΠ΅Π»Π΅Π½ΠΈΠ΅, ΠΈ Π²ΠΎΠ΅Π½Π½ΡΠ΅, Π² ΡΠΎΠΌ ΡΠΈΡΠ»Π΅ ΡΠΎΡΡΠΈΠΉΡΠΊΠΈΠ΅ ΠΏΡΠΈΠ·ΡΠ²Π½ΠΈΠΊΠΈ, ΠΊΠΎΡΠΎΡΡΡ
Π±ΡΠΎΡΠΈΠ»ΠΈ Π²ΠΎΠ΅Π²Π°ΡΡ. Π§ΡΠΎΠ±Ρ Π»ΠΈΡΠΈΡΡ ΡΠΎΠ±ΡΡΠ²Π΅Π½Π½ΡΠΉ Π½Π°ΡΠΎΠ΄ Π΄ΠΎΡΡΡΠΏΠ° ΠΊ ΠΈΠ½ΡΠΎΡΠΌΠ°ΡΠΈΠΈ, ΠΏΡΠ°Π²ΠΈΡΠ΅Π»ΡΡΡΠ²ΠΎ Π Π€ Π·Π°ΠΏΡΠ΅ΡΠΈΠ»ΠΎ Π½Π°Π·ΡΠ²Π°ΡΡ Π²ΠΎΠΉΠ½Ρ Π²ΠΎΠΉΠ½ΠΎΠΉ, Π·Π°ΠΊΡΡΠ»ΠΎ Π½Π΅Π·Π°Π²ΠΈΡΠΈΠΌΡΠ΅ Π‘ΠΠ ΠΈ ΠΏΡΠΈΠ½ΠΈΠΌΠ°Π΅Ρ ΡΠ΅ΠΉΡΠ°Ρ ΡΡΠ΄ Π΄ΠΈΠΊΡΠ°ΡΠΎΡΡΠΊΠΈΡ
Π·Π°ΠΊΠΎΠ½ΠΎΠ². ΠΡΠΈ Π·Π°ΠΊΠΎΠ½Ρ ΠΏΡΠΈΠ·Π²Π°Π½Ρ Π·Π°ΡΠΊΠ½ΡΡΡ ΡΠΎΡ Π²ΡΠ΅ΠΌ, ΠΊΡΠΎ ΠΏΡΠΎΡΠΈΠ² Π²ΠΎΠΉΠ½Ρ. ΠΠ° ΠΎΠ±ΡΡΠ½ΡΠΉ ΠΏΡΠΈΠ·ΡΠ² ΠΊ ΠΌΠΈΡΡ ΡΠ΅ΠΉΡΠ°Ρ ΠΌΠΎΠΆΠ½ΠΎ ΠΏΠΎΠ»ΡΡΠΈΡΡ Π½Π΅ΡΠΊΠΎΠ»ΡΠΊΠΎ Π»Π΅Ρ ΡΡΡΡΠΌΡ. - * ΠΠ΅ ΠΌΠΎΠ»ΡΠΈΡΠ΅! ΠΠΎΠ»ΡΠ°Π½ΠΈΠ΅ - Π·Π½Π°ΠΊ Π²Π°ΡΠ΅Π³ΠΎ ΡΠΎΠ³Π»Π°ΡΠΈΡ Ρ ΠΏΠΎΠ»ΠΈΡΠΈΠΊΠΎΠΉ ΡΠΎΡΡΠΈΠΉΡΠΊΠΎΠ³ΠΎ ΠΏΡΠ°Π²ΠΈΡΠ΅Π»ΡΡΡΠ²Π°. - * **ΠΡ ΠΌΠΎΠΆΠ΅ΡΠ΅ ΡΠ΄Π΅Π»Π°ΡΡ Π²ΡΠ±ΠΎΡ ΠΠ ΠΠΠΠ§ΠΠ’Π¬.** - * --- - * ## πΊπΈ To people of Russia - * There is a war in Ukraine right now. The forces of the Russian Federation are attacking civilian infrastructure in [Kharkiv][1], [Kyiv][2], [Chernihiv][3], [Sumy][4], [Irpin][5] and dozens of other cities. People are dying β both civilians and military servicemen, including Russian conscripts who were thrown into the fighting. In order to deprive its own people of access to information, the government of the Russian Federation has forbidden calling a war a war, shut down independent media and is passing a number of dictatorial laws. These laws are meant to silence all those who are against war. You can be jailed for multiple years for simply calling for peace. - * Do not be silent! Silence is a sign that you accept the Russian government's policy. - * **You can choose NOT TO BE SILENT.** - * --- - * - [1] https://cloudfront-us-east-2.images.arcpublishing.com/reuters/P7K2MSZDGFMIJPDD7CI2GIROJI.jpg "Kharkiv under attack" - * - [2] https://gdb.voanews.com/01bd0000-0aff-0242-fad0-08d9fc92c5b3_cx0_cy5_cw0_w1023_r1_s.jpg "Kyiv under attack" - * - [3] https://ichef.bbci.co.uk/news/976/cpsprodpb/163DD/production/_123510119_hi074310744.jpg "Chernihiv under attack" - * - [4] https://www.youtube.com/watch?v=8K-bkqKKf2A "Sumy under attack" - * - [5] https://cloudfront-us-east-2.images.arcpublishing.com/reuters/K4MTMLEHTRKGFK3GSKAT4GR3NE.jpg "Irpin under attack" - * * @psalm-immutable */ final class ASCII @@ -288,7 +272,9 @@ final class ASCII */ public static function charsArrayWithMultiLanguageValues(bool $replace_extra_symbols = false): array { - /** @var array<string, array<string, array<int, string>>> */ + /** + * @var array<string, array> + */ static $CHARS_ARRAY = []; $cacheKey = '' . $replace_extra_symbols; @@ -343,7 +329,6 @@ final class ASCII * @return array * <p>An array of replacements.</p> * - * @phpstan-param ASCII::*_LANGUAGE_CODE $language * @phpstan-return array{orig: string[], replace: string[]}|array<string, string> */ public static function charsArrayWithOneLanguage( @@ -354,7 +339,9 @@ final class ASCII $language = self::get_language($language); // init - /** @var array<string, array<string, array<string, string>|array{orig: string[], replace: string[]}>> */ + /** + * @var array<string, array> + */ static $CHARS_ARRAY = []; $cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray; @@ -445,7 +432,9 @@ final class ASCII bool $asOrigReplaceArray = true ): array { // init - /** @var array<string, array<string, string>|array{orig: string[], replace: string[]}> */ + /** + * @var array<string,array> + */ static $CHARS_ARRAY = []; $cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray; @@ -471,7 +460,6 @@ final class ASCII } } - /** @phpstan-ignore-next-line - ... error? */ $CHARS_ARRAY[$cacheKey] = \array_merge([], ...$CHARS_ARRAY[$cacheKey]); if ($asOrigReplaceArray) { @@ -590,13 +578,19 @@ final class ASCII return ''; } - /** @var array{orig: string[], replace: string[]} */ + /** + * @var array{orig: string[], replace: string[]} + */ static $MSWORD_CACHE = ['orig' => [], 'replace' => []]; if (empty($MSWORD_CACHE['orig'])) { self::prepareAsciiMaps(); - /** @var array<string, string> */ + /** + * @psalm-suppress PossiblyNullArrayAccess - we use the prepare* methods here, so we don't get NULL here + * + * @var array<string, string> + */ $map = self::$ASCII_MAPS[self::EXTRA_MSWORD_CHARS_LANGUAGE_CODE] ?? []; $MSWORD_CACHE = [ @@ -636,7 +630,9 @@ final class ASCII return ''; } - /** @var array<int,array<string,string>> */ + /** + * @var array<int,array<string,string>> + */ static $WHITESPACE_CACHE = []; $cacheKey = (int) $keepNonBreakingSpace; @@ -669,11 +665,13 @@ final class ASCII unset($WHITESPACE_CACHE[$cacheKey]["\xc2\xa0"]); } - $WHITESPACE_CACHE[$cacheKey] = array_keys($WHITESPACE_CACHE[$cacheKey]); + $WHITESPACE_CACHE[$cacheKey] = \array_keys($WHITESPACE_CACHE[$cacheKey]); } if (!$keepBidiUnicodeControls) { - /** @var array<int,string>|null */ + /** + * @var array<int,string>|null + */ static $BIDI_UNICODE_CONTROLS_CACHE = null; if ($BIDI_UNICODE_CONTROLS_CACHE === null) { @@ -761,6 +759,45 @@ final class ASCII } /** + * WARNING: This method will return broken characters and is only for special cases. + * + * Convert a UTF-8 encoded string to a single-byte string suitable for + * functions that need the same string length after the conversion. + * + * The function simply uses (and updates) a tailored dynamic encoding + * (in/out map parameter) where non-ascii characters are remapped to + * the range [128-255] in order of appearance. + * + * Thus, it supports up to 128 different multibyte code points max over + * the whole set of strings sharing this encoding. + * + * Source: https://github.com/KEINOS/mb_levenshtein + * + * @param string $str UTF-8 string to be converted to extended ASCII. + * @return string Mapped borken string. + */ + private static function to_ascii_remap_intern(string $str, array &$map): string + { + // find all utf-8 characters + $matches = []; + if (!\preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) { + return $str; // plain ascii string + } + + // update the encoding map with the characters not already met + $mapCount = \count($map); + foreach ($matches[0] as $mbc) { + if (!isset($map[$mbc])) { + $map[$mbc] = \chr(128 + $mapCount); + $mapCount++; + } + } + + // finally remap non-ascii characters + return \strtr($str, $map); + } + + /** * Returns an ASCII version of the string. A set of non-ASCII characters are * replaced with their closest ASCII counterparts, and the rest are removed * by default. The language or locale of the source string can be supplied @@ -789,8 +826,6 @@ final class ASCII * * @return string * <p>A string that contains only ASCII characters.</p> - * - * @phpstan-param ASCII::*_LANGUAGE_CODE $language */ public static function to_ascii( string $str, @@ -804,12 +839,13 @@ final class ASCII return ''; } - /** @phpstan-var ASCII::*_LANGUAGE_CODE - hack for phpstan */ $language = self::get_language($language); static $EXTRA_SYMBOLS_CACHE = null; - /** @var array<string,array<string,string>> */ + /** + * @var array<string,array<string,string>> + */ static $REPLACE_HELPER_CACHE = []; $cacheKey = $language . '-' . $replace_extra_symbols; @@ -983,6 +1019,7 @@ final class ASCII } if ($use_transliterate) { + /** @noinspection ArgumentEqualsDefaultValueInspection */ $str = self::to_transliterate($str, null, false); } @@ -1024,9 +1061,9 @@ final class ASCII $str = (string) \preg_replace( [ - '/[^' . $fallback_char_escaped . '.\\-a-zA-Z\d\\s]/', // 1) remove un-needed chars - '/\s+/u', // 2) convert spaces to $fallback_char - '/[' . $fallback_char_escaped . ']+/u', // 3) remove double $fallback_char's + '/[^' . $fallback_char_escaped . '.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars + '/[\\s]+/u', // 2) convert spaces to $fallback_char + '/[' . $fallback_char_escaped . ']+/u', // 3) remove double $fallback_char's ], [ '', @@ -1061,8 +1098,6 @@ final class ASCII * * @return string * <p>A string that has been converted to an URL slug.</p> - * - * @phpstan-param ASCII::*_LANGUAGE_CODE $language */ public static function to_slugify( string $str, @@ -1143,13 +1178,19 @@ final class ASCII $unknown = '?', bool $strict = false ): string { - /** @var array<int,string>|null */ + /** + * @var array<int,string>|null + */ static $UTF8_TO_TRANSLIT = null; - /** null|\Transliterator */ + /** + * null|\Transliterator + */ static $TRANSLITERATOR = null; - /** @var bool|null */ + /** + * @var bool|null + */ static $SUPPORT_INTL = null; if ($str === '') { @@ -1184,7 +1225,9 @@ final class ASCII ) { if (!isset($TRANSLITERATOR)) { // INFO: see "*-Latin" rules via "transliterator_list_ids()" - /** @var \Transliterator */ + /** + * @var \Transliterator + */ $TRANSLITERATOR = \transliterator_create('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;'); } @@ -1334,50 +1377,6 @@ final class ASCII } /** - * WARNING: This method will return broken characters and is only for special cases. - * - * Convert a UTF-8 encoded string to a single-byte string suitable for - * functions that need the same string length after the conversion. - * - * The function simply uses (and updates) a tailored dynamic encoding - * (in/out map parameter) where non-ascii characters are remapped to - * the range [128-255] in order of appearance. - * - * Thus, it supports up to 128 different multibyte code points max over - * the whole set of strings sharing this encoding. - * - * Source: https://github.com/KEINOS/mb_levenshtein - * - * @param string $str <p>UTF-8 string to be converted to extended ASCII.</p> - * @param array $map <p>Internal-Map of code points to ASCII characters.</p> - * - * @return string - * <p>Mapped borken string.</p> - * - * @phpstan-param array<string, string> $map - */ - private static function to_ascii_remap_intern(string $str, array &$map): string - { - // find all utf-8 characters - $matches = []; - if (!\preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) { - return $str; // plain ascii string - } - - // update the encoding map with the characters not already met - $mapCount = \count($map); - foreach ($matches[0] as $mbc) { - if (!isset($map[$mbc])) { - $map[$mbc] = \chr(128 + $mapCount); - ++$mapCount; - } - } - - // finally, remap non-ascii characters - return \strtr($str, $map); - } - - /** * Get the language from a string. * * e.g.: de_at -> de_at |