aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/voku/portable-ascii/src/voku/helper/ASCII.php')
-rw-r--r--vendor/voku/portable-ascii/src/voku/helper/ASCII.php165
1 files changed, 83 insertions, 82 deletions
diff --git a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
index aa676abda..7d8b66de7 100644
--- a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
+++ b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
@@ -5,6 +5,22 @@ declare(strict_types=1);
namespace voku\helper;
/**
+ * ## πŸ‡·πŸ‡Ί Русским Π³Ρ€Π°ΠΆΠ΄Π°Π½Π°ΠΌ
+ * Π’ Π£ΠΊΡ€Π°ΠΈΠ½Π΅ сСйчас ΠΈΠ΄Π΅Ρ‚ Π²ΠΎΠΉΠ½Π°. Π‘ΠΈΠ»Π°ΠΌΠΈ Π Π€ наносятся ΡƒΠ΄Π°Ρ€Ρ‹ ΠΏΠΎ граТданской инфраструктурС Π² [Π₯Π°Ρ€ΡŒΠΊΠΎΠ²Π΅][1], [КиСвС][2], [Π§Π΅Ρ€Π½ΠΈΠ³ΠΎΠ²Π΅][3], [Π‘ΡƒΠΌΠ°Ρ…][4], [Π˜Ρ€ΠΏΠ΅Π½Π΅][5] ΠΈ дСсятках Π΄Ρ€ΡƒΠ³ΠΈΡ… Π³ΠΎΡ€ΠΎΠ΄ΠΎΠ². Π“ΠΈΠ±Π½ΡƒΡ‚ люди - ΠΈ граТданскоС насСлСниС, ΠΈ Π²ΠΎΠ΅Π½Π½Ρ‹Π΅, Π² Ρ‚ΠΎΠΌ числС российскиС ΠΏΡ€ΠΈΠ·Ρ‹Π²Π½ΠΈΠΊΠΈ, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Ρ… бросили Π²ΠΎΠ΅Π²Π°Ρ‚ΡŒ. Π§Ρ‚ΠΎΠ±Ρ‹ Π»ΠΈΡˆΠΈΡ‚ΡŒ собствСнный Π½Π°Ρ€ΠΎΠ΄ доступа ΠΊ ΠΈΠ½Ρ„ΠΎΡ€ΠΌΠ°Ρ†ΠΈΠΈ, ΠΏΡ€Π°Π²ΠΈΡ‚Π΅Π»ΡŒΡΡ‚Π²ΠΎ Π Π€ Π·Π°ΠΏΡ€Π΅Ρ‚ΠΈΠ»ΠΎ Π½Π°Π·Ρ‹Π²Π°Ρ‚ΡŒ Π²ΠΎΠΉΠ½Ρƒ Π²ΠΎΠΉΠ½ΠΎΠΉ, Π·Π°ΠΊΡ€Ρ‹Π»ΠΎ нСзависимыС БМИ ΠΈ ΠΏΡ€ΠΈΠ½ΠΈΠΌΠ°Π΅Ρ‚ сСйчас ряд диктаторских Π·Π°ΠΊΠΎΠ½ΠΎΠ². Π­Ρ‚ΠΈ Π·Π°ΠΊΠΎΠ½Ρ‹ ΠΏΡ€ΠΈΠ·Π²Π°Π½Ρ‹ Π·Π°Ρ‚ΠΊΠ½ΡƒΡ‚ΡŒ Ρ€ΠΎΡ‚ всСм, ΠΊΡ‚ΠΎ ΠΏΡ€ΠΎΡ‚ΠΈΠ² Π²ΠΎΠΉΠ½Ρ‹. Π—Π° ΠΎΠ±Ρ‹Ρ‡Π½Ρ‹ΠΉ ΠΏΡ€ΠΈΠ·Ρ‹Π² ΠΊ ΠΌΠΈΡ€Ρƒ сСйчас ΠΌΠΎΠΆΠ½ΠΎ ΠΏΠΎΠ»ΡƒΡ‡ΠΈΡ‚ΡŒ нСсколько Π»Π΅Ρ‚ Ρ‚ΡŽΡ€ΡŒΠΌΡ‹.
+ * НС ΠΌΠΎΠ»Ρ‡ΠΈΡ‚Π΅! ΠœΠΎΠ»Ρ‡Π°Π½ΠΈΠ΅ - Π·Π½Π°ΠΊ вашСго согласия с ΠΏΠΎΠ»ΠΈΡ‚ΠΈΠΊΠΎΠΉ российского ΠΏΡ€Π°Π²ΠΈΡ‚Π΅Π»ΡŒΡΡ‚Π²Π°.
+ * **Π’Ρ‹ ΠΌΠΎΠΆΠ΅Ρ‚Π΅ ΡΠ΄Π΅Π»Π°Ρ‚ΡŒ Π²Ρ‹Π±ΠΎΡ€ НЕ ΠœΠžΠ›Π§ΠΠ’Π¬.**
+ * ---
+ * ## πŸ‡ΊπŸ‡Έ To people of Russia
+ * There is a war in Ukraine right now. The forces of the Russian Federation are attacking civilian infrastructure in [Kharkiv][1], [Kyiv][2], [Chernihiv][3], [Sumy][4], [Irpin][5] and dozens of other cities. People are dying – both civilians and military servicemen, including Russian conscripts who were thrown into the fighting. In order to deprive its own people of access to information, the government of the Russian Federation has forbidden calling a war a war, shut down independent media and is passing a number of dictatorial laws. These laws are meant to silence all those who are against war. You can be jailed for multiple years for simply calling for peace.
+ * Do not be silent! Silence is a sign that you accept the Russian government's policy.
+ * **You can choose NOT TO BE SILENT.**
+ * ---
+ * - [1] https://cloudfront-us-east-2.images.arcpublishing.com/reuters/P7K2MSZDGFMIJPDD7CI2GIROJI.jpg "Kharkiv under attack"
+ * - [2] https://gdb.voanews.com/01bd0000-0aff-0242-fad0-08d9fc92c5b3_cx0_cy5_cw0_w1023_r1_s.jpg "Kyiv under attack"
+ * - [3] https://ichef.bbci.co.uk/news/976/cpsprodpb/163DD/production/_123510119_hi074310744.jpg "Chernihiv under attack"
+ * - [4] https://www.youtube.com/watch?v=8K-bkqKKf2A "Sumy under attack"
+ * - [5] https://cloudfront-us-east-2.images.arcpublishing.com/reuters/K4MTMLEHTRKGFK3GSKAT4GR3NE.jpg "Irpin under attack"
+ *
* @psalm-immutable
*/
final class ASCII
@@ -272,9 +288,7 @@ final class ASCII
*/
public static function charsArrayWithMultiLanguageValues(bool $replace_extra_symbols = false): array
{
- /**
- * @var array<string, array>
- */
+ /** @var array<string, array<string, array<int, string>>> */
static $CHARS_ARRAY = [];
$cacheKey = '' . $replace_extra_symbols;
@@ -329,6 +343,7 @@ final class ASCII
* @return array
* <p>An array of replacements.</p>
*
+ * @phpstan-param ASCII::*_LANGUAGE_CODE $language
* @phpstan-return array{orig: string[], replace: string[]}|array<string, string>
*/
public static function charsArrayWithOneLanguage(
@@ -339,9 +354,7 @@ final class ASCII
$language = self::get_language($language);
// init
- /**
- * @var array<string, array>
- */
+ /** @var array<string, array<string, array<string, string>|array{orig: string[], replace: string[]}>> */
static $CHARS_ARRAY = [];
$cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray;
@@ -432,9 +445,7 @@ final class ASCII
bool $asOrigReplaceArray = true
): array {
// init
- /**
- * @var array<string,array>
- */
+ /** @var array<string, array<string, string>|array{orig: string[], replace: string[]}> */
static $CHARS_ARRAY = [];
$cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray;
@@ -460,6 +471,7 @@ final class ASCII
}
}
+ /** @phpstan-ignore-next-line - ... error? */
$CHARS_ARRAY[$cacheKey] = \array_merge([], ...$CHARS_ARRAY[$cacheKey]);
if ($asOrigReplaceArray) {
@@ -578,19 +590,13 @@ final class ASCII
return '';
}
- /**
- * @var array{orig: string[], replace: string[]}
- */
+ /** @var array{orig: string[], replace: string[]} */
static $MSWORD_CACHE = ['orig' => [], 'replace' => []];
if (empty($MSWORD_CACHE['orig'])) {
self::prepareAsciiMaps();
- /**
- * @psalm-suppress PossiblyNullArrayAccess - we use the prepare* methods here, so we don't get NULL here
- *
- * @var array<string, string>
- */
+ /** @var array<string, string> */
$map = self::$ASCII_MAPS[self::EXTRA_MSWORD_CHARS_LANGUAGE_CODE] ?? [];
$MSWORD_CACHE = [
@@ -630,9 +636,7 @@ final class ASCII
return '';
}
- /**
- * @var array<int,array<string,string>>
- */
+ /** @var array<int,array<string,string>> */
static $WHITESPACE_CACHE = [];
$cacheKey = (int) $keepNonBreakingSpace;
@@ -665,13 +669,11 @@ final class ASCII
unset($WHITESPACE_CACHE[$cacheKey]["\xc2\xa0"]);
}
- $WHITESPACE_CACHE[$cacheKey] = \array_keys($WHITESPACE_CACHE[$cacheKey]);
+ $WHITESPACE_CACHE[$cacheKey] = array_keys($WHITESPACE_CACHE[$cacheKey]);
}
if (!$keepBidiUnicodeControls) {
- /**
- * @var array<int,string>|null
- */
+ /** @var array<int,string>|null */
static $BIDI_UNICODE_CONTROLS_CACHE = null;
if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
@@ -759,45 +761,6 @@ final class ASCII
}
/**
- * WARNING: This method will return broken characters and is only for special cases.
- *
- * Convert a UTF-8 encoded string to a single-byte string suitable for
- * functions that need the same string length after the conversion.
- *
- * The function simply uses (and updates) a tailored dynamic encoding
- * (in/out map parameter) where non-ascii characters are remapped to
- * the range [128-255] in order of appearance.
- *
- * Thus, it supports up to 128 different multibyte code points max over
- * the whole set of strings sharing this encoding.
- *
- * Source: https://github.com/KEINOS/mb_levenshtein
- *
- * @param string $str UTF-8 string to be converted to extended ASCII.
- * @return string Mapped borken string.
- */
- private static function to_ascii_remap_intern(string $str, array &$map): string
- {
- // find all utf-8 characters
- $matches = [];
- if (!\preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) {
- return $str; // plain ascii string
- }
-
- // update the encoding map with the characters not already met
- $mapCount = \count($map);
- foreach ($matches[0] as $mbc) {
- if (!isset($map[$mbc])) {
- $map[$mbc] = \chr(128 + $mapCount);
- $mapCount++;
- }
- }
-
- // finally remap non-ascii characters
- return \strtr($str, $map);
- }
-
- /**
* Returns an ASCII version of the string. A set of non-ASCII characters are
* replaced with their closest ASCII counterparts, and the rest are removed
* by default. The language or locale of the source string can be supplied
@@ -826,6 +789,8 @@ final class ASCII
*
* @return string
* <p>A string that contains only ASCII characters.</p>
+ *
+ * @phpstan-param ASCII::*_LANGUAGE_CODE $language
*/
public static function to_ascii(
string $str,
@@ -839,13 +804,12 @@ final class ASCII
return '';
}
+ /** @phpstan-var ASCII::*_LANGUAGE_CODE - hack for phpstan */
$language = self::get_language($language);
static $EXTRA_SYMBOLS_CACHE = null;
- /**
- * @var array<string,array<string,string>>
- */
+ /** @var array<string,array<string,string>> */
static $REPLACE_HELPER_CACHE = [];
$cacheKey = $language . '-' . $replace_extra_symbols;
@@ -1019,7 +983,6 @@ final class ASCII
}
if ($use_transliterate) {
- /** @noinspection ArgumentEqualsDefaultValueInspection */
$str = self::to_transliterate($str, null, false);
}
@@ -1061,9 +1024,9 @@ final class ASCII
$str = (string) \preg_replace(
[
- '/[^' . $fallback_char_escaped . '.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars
- '/[\\s]+/u', // 2) convert spaces to $fallback_char
- '/[' . $fallback_char_escaped . ']+/u', // 3) remove double $fallback_char's
+ '/[^' . $fallback_char_escaped . '.\\-a-zA-Z\d\\s]/', // 1) remove un-needed chars
+ '/\s+/u', // 2) convert spaces to $fallback_char
+ '/[' . $fallback_char_escaped . ']+/u', // 3) remove double $fallback_char's
],
[
'',
@@ -1098,6 +1061,8 @@ final class ASCII
*
* @return string
* <p>A string that has been converted to an URL slug.</p>
+ *
+ * @phpstan-param ASCII::*_LANGUAGE_CODE $language
*/
public static function to_slugify(
string $str,
@@ -1178,19 +1143,13 @@ final class ASCII
$unknown = '?',
bool $strict = false
): string {
- /**
- * @var array<int,string>|null
- */
+ /** @var array<int,string>|null */
static $UTF8_TO_TRANSLIT = null;
- /**
- * null|\Transliterator
- */
+ /** null|\Transliterator */
static $TRANSLITERATOR = null;
- /**
- * @var bool|null
- */
+ /** @var bool|null */
static $SUPPORT_INTL = null;
if ($str === '') {
@@ -1225,9 +1184,7 @@ final class ASCII
) {
if (!isset($TRANSLITERATOR)) {
// INFO: see "*-Latin" rules via "transliterator_list_ids()"
- /**
- * @var \Transliterator
- */
+ /** @var \Transliterator */
$TRANSLITERATOR = \transliterator_create('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;');
}
@@ -1377,6 +1334,50 @@ final class ASCII
}
/**
+ * WARNING: This method will return broken characters and is only for special cases.
+ *
+ * Convert a UTF-8 encoded string to a single-byte string suitable for
+ * functions that need the same string length after the conversion.
+ *
+ * The function simply uses (and updates) a tailored dynamic encoding
+ * (in/out map parameter) where non-ascii characters are remapped to
+ * the range [128-255] in order of appearance.
+ *
+ * Thus, it supports up to 128 different multibyte code points max over
+ * the whole set of strings sharing this encoding.
+ *
+ * Source: https://github.com/KEINOS/mb_levenshtein
+ *
+ * @param string $str <p>UTF-8 string to be converted to extended ASCII.</p>
+ * @param array $map <p>Internal-Map of code points to ASCII characters.</p>
+ *
+ * @return string
+ * <p>Mapped borken string.</p>
+ *
+ * @phpstan-param array<string, string> $map
+ */
+ private static function to_ascii_remap_intern(string $str, array &$map): string
+ {
+ // find all utf-8 characters
+ $matches = [];
+ if (!\preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) {
+ return $str; // plain ascii string
+ }
+
+ // update the encoding map with the characters not already met
+ $mapCount = \count($map);
+ foreach ($matches[0] as $mbc) {
+ if (!isset($map[$mbc])) {
+ $map[$mbc] = \chr(128 + $mapCount);
+ ++$mapCount;
+ }
+ }
+
+ // finally, remap non-ascii characters
+ return \strtr($str, $map);
+ }
+
+ /**
* Get the language from a string.
*
* e.g.: de_at -> de_at