aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/voku/portable-ascii/src
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/voku/portable-ascii/src')
-rw-r--r--vendor/voku/portable-ascii/src/voku/helper/ASCII.php165
-rw-r--r--vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php149
-rw-r--r--vendor/voku/portable-ascii/src/voku/helper/data/ascii_extras_by_languages.php8
-rw-r--r--vendor/voku/portable-ascii/src/voku/helper/data/ascii_language_max_key.php2
4 files changed, 151 insertions, 173 deletions
diff --git a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
index 7d8b66de7..aa676abda 100644
--- a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
+++ b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
@@ -5,22 +5,6 @@ declare(strict_types=1);
namespace voku\helper;
/**
- * ## 🇷🇺 Русским гражданам
- * В Украине сейчас идет война. Силами РФ наносятся удары по гражданской инфраструктуре в [Харькове][1], [Киеве][2], [Чернигове][3], [Сумах][4], [Ирпене][5] и десятках других городов. Гибнут люди - и гражданское население, и военные, в том числе российские призывники, которых бросили воевать. Чтобы лишить собственный народ доступа к информации, правительство РФ запретило называть войну войной, закрыло независимые СМИ и принимает сейчас ряд диктаторских законов. Эти законы призваны заткнуть рот всем, кто против войны. За обычный призыв к миру сейчас можно получить несколько лет тюрьмы.
- * Не молчите! Молчание - знак вашего согласия с политикой российского правительства.
- * **Вы можете сделать выбор НЕ МОЛЧАТЬ.**
- * ---
- * ## 🇺🇸 To people of Russia
- * There is a war in Ukraine right now. The forces of the Russian Federation are attacking civilian infrastructure in [Kharkiv][1], [Kyiv][2], [Chernihiv][3], [Sumy][4], [Irpin][5] and dozens of other cities. People are dying – both civilians and military servicemen, including Russian conscripts who were thrown into the fighting. In order to deprive its own people of access to information, the government of the Russian Federation has forbidden calling a war a war, shut down independent media and is passing a number of dictatorial laws. These laws are meant to silence all those who are against war. You can be jailed for multiple years for simply calling for peace.
- * Do not be silent! Silence is a sign that you accept the Russian government's policy.
- * **You can choose NOT TO BE SILENT.**
- * ---
- * - [1] https://cloudfront-us-east-2.images.arcpublishing.com/reuters/P7K2MSZDGFMIJPDD7CI2GIROJI.jpg "Kharkiv under attack"
- * - [2] https://gdb.voanews.com/01bd0000-0aff-0242-fad0-08d9fc92c5b3_cx0_cy5_cw0_w1023_r1_s.jpg "Kyiv under attack"
- * - [3] https://ichef.bbci.co.uk/news/976/cpsprodpb/163DD/production/_123510119_hi074310744.jpg "Chernihiv under attack"
- * - [4] https://www.youtube.com/watch?v=8K-bkqKKf2A "Sumy under attack"
- * - [5] https://cloudfront-us-east-2.images.arcpublishing.com/reuters/K4MTMLEHTRKGFK3GSKAT4GR3NE.jpg "Irpin under attack"
- *
* @psalm-immutable
*/
final class ASCII
@@ -288,7 +272,9 @@ final class ASCII
*/
public static function charsArrayWithMultiLanguageValues(bool $replace_extra_symbols = false): array
{
- /** @var array<string, array<string, array<int, string>>> */
+ /**
+ * @var array<string, array>
+ */
static $CHARS_ARRAY = [];
$cacheKey = '' . $replace_extra_symbols;
@@ -343,7 +329,6 @@ final class ASCII
* @return array
* <p>An array of replacements.</p>
*
- * @phpstan-param ASCII::*_LANGUAGE_CODE $language
* @phpstan-return array{orig: string[], replace: string[]}|array<string, string>
*/
public static function charsArrayWithOneLanguage(
@@ -354,7 +339,9 @@ final class ASCII
$language = self::get_language($language);
// init
- /** @var array<string, array<string, array<string, string>|array{orig: string[], replace: string[]}>> */
+ /**
+ * @var array<string, array>
+ */
static $CHARS_ARRAY = [];
$cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray;
@@ -445,7 +432,9 @@ final class ASCII
bool $asOrigReplaceArray = true
): array {
// init
- /** @var array<string, array<string, string>|array{orig: string[], replace: string[]}> */
+ /**
+ * @var array<string,array>
+ */
static $CHARS_ARRAY = [];
$cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray;
@@ -471,7 +460,6 @@ final class ASCII
}
}
- /** @phpstan-ignore-next-line - ... error? */
$CHARS_ARRAY[$cacheKey] = \array_merge([], ...$CHARS_ARRAY[$cacheKey]);
if ($asOrigReplaceArray) {
@@ -590,13 +578,19 @@ final class ASCII
return '';
}
- /** @var array{orig: string[], replace: string[]} */
+ /**
+ * @var array{orig: string[], replace: string[]}
+ */
static $MSWORD_CACHE = ['orig' => [], 'replace' => []];
if (empty($MSWORD_CACHE['orig'])) {
self::prepareAsciiMaps();
- /** @var array<string, string> */
+ /**
+ * @psalm-suppress PossiblyNullArrayAccess - we use the prepare* methods here, so we don't get NULL here
+ *
+ * @var array<string, string>
+ */
$map = self::$ASCII_MAPS[self::EXTRA_MSWORD_CHARS_LANGUAGE_CODE] ?? [];
$MSWORD_CACHE = [
@@ -636,7 +630,9 @@ final class ASCII
return '';
}
- /** @var array<int,array<string,string>> */
+ /**
+ * @var array<int,array<string,string>>
+ */
static $WHITESPACE_CACHE = [];
$cacheKey = (int) $keepNonBreakingSpace;
@@ -669,11 +665,13 @@ final class ASCII
unset($WHITESPACE_CACHE[$cacheKey]["\xc2\xa0"]);
}
- $WHITESPACE_CACHE[$cacheKey] = array_keys($WHITESPACE_CACHE[$cacheKey]);
+ $WHITESPACE_CACHE[$cacheKey] = \array_keys($WHITESPACE_CACHE[$cacheKey]);
}
if (!$keepBidiUnicodeControls) {
- /** @var array<int,string>|null */
+ /**
+ * @var array<int,string>|null
+ */
static $BIDI_UNICODE_CONTROLS_CACHE = null;
if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
@@ -761,6 +759,45 @@ final class ASCII
}
/**
+ * WARNING: This method will return broken characters and is only for special cases.
+ *
+ * Convert a UTF-8 encoded string to a single-byte string suitable for
+ * functions that need the same string length after the conversion.
+ *
+ * The function simply uses (and updates) a tailored dynamic encoding
+ * (in/out map parameter) where non-ascii characters are remapped to
+ * the range [128-255] in order of appearance.
+ *
+ * Thus, it supports up to 128 different multibyte code points max over
+ * the whole set of strings sharing this encoding.
+ *
+ * Source: https://github.com/KEINOS/mb_levenshtein
+ *
+ * @param string $str UTF-8 string to be converted to extended ASCII.
+ * @return string Mapped borken string.
+ */
+ private static function to_ascii_remap_intern(string $str, array &$map): string
+ {
+ // find all utf-8 characters
+ $matches = [];
+ if (!\preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) {
+ return $str; // plain ascii string
+ }
+
+ // update the encoding map with the characters not already met
+ $mapCount = \count($map);
+ foreach ($matches[0] as $mbc) {
+ if (!isset($map[$mbc])) {
+ $map[$mbc] = \chr(128 + $mapCount);
+ $mapCount++;
+ }
+ }
+
+ // finally remap non-ascii characters
+ return \strtr($str, $map);
+ }
+
+ /**
* Returns an ASCII version of the string. A set of non-ASCII characters are
* replaced with their closest ASCII counterparts, and the rest are removed
* by default. The language or locale of the source string can be supplied
@@ -789,8 +826,6 @@ final class ASCII
*
* @return string
* <p>A string that contains only ASCII characters.</p>
- *
- * @phpstan-param ASCII::*_LANGUAGE_CODE $language
*/
public static function to_ascii(
string $str,
@@ -804,12 +839,13 @@ final class ASCII
return '';
}
- /** @phpstan-var ASCII::*_LANGUAGE_CODE - hack for phpstan */
$language = self::get_language($language);
static $EXTRA_SYMBOLS_CACHE = null;
- /** @var array<string,array<string,string>> */
+ /**
+ * @var array<string,array<string,string>>
+ */
static $REPLACE_HELPER_CACHE = [];
$cacheKey = $language . '-' . $replace_extra_symbols;
@@ -983,6 +1019,7 @@ final class ASCII
}
if ($use_transliterate) {
+ /** @noinspection ArgumentEqualsDefaultValueInspection */
$str = self::to_transliterate($str, null, false);
}
@@ -1024,9 +1061,9 @@ final class ASCII
$str = (string) \preg_replace(
[
- '/[^' . $fallback_char_escaped . '.\\-a-zA-Z\d\\s]/', // 1) remove un-needed chars
- '/\s+/u', // 2) convert spaces to $fallback_char
- '/[' . $fallback_char_escaped . ']+/u', // 3) remove double $fallback_char's
+ '/[^' . $fallback_char_escaped . '.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars
+ '/[\\s]+/u', // 2) convert spaces to $fallback_char
+ '/[' . $fallback_char_escaped . ']+/u', // 3) remove double $fallback_char's
],
[
'',
@@ -1061,8 +1098,6 @@ final class ASCII
*
* @return string
* <p>A string that has been converted to an URL slug.</p>
- *
- * @phpstan-param ASCII::*_LANGUAGE_CODE $language
*/
public static function to_slugify(
string $str,
@@ -1143,13 +1178,19 @@ final class ASCII
$unknown = '?',
bool $strict = false
): string {
- /** @var array<int,string>|null */
+ /**
+ * @var array<int,string>|null
+ */
static $UTF8_TO_TRANSLIT = null;
- /** null|\Transliterator */
+ /**
+ * null|\Transliterator
+ */
static $TRANSLITERATOR = null;
- /** @var bool|null */
+ /**
+ * @var bool|null
+ */
static $SUPPORT_INTL = null;
if ($str === '') {
@@ -1184,7 +1225,9 @@ final class ASCII
) {
if (!isset($TRANSLITERATOR)) {
// INFO: see "*-Latin" rules via "transliterator_list_ids()"
- /** @var \Transliterator */
+ /**
+ * @var \Transliterator
+ */
$TRANSLITERATOR = \transliterator_create('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;');
}
@@ -1334,50 +1377,6 @@ final class ASCII
}
/**
- * WARNING: This method will return broken characters and is only for special cases.
- *
- * Convert a UTF-8 encoded string to a single-byte string suitable for
- * functions that need the same string length after the conversion.
- *
- * The function simply uses (and updates) a tailored dynamic encoding
- * (in/out map parameter) where non-ascii characters are remapped to
- * the range [128-255] in order of appearance.
- *
- * Thus, it supports up to 128 different multibyte code points max over
- * the whole set of strings sharing this encoding.
- *
- * Source: https://github.com/KEINOS/mb_levenshtein
- *
- * @param string $str <p>UTF-8 string to be converted to extended ASCII.</p>
- * @param array $map <p>Internal-Map of code points to ASCII characters.</p>
- *
- * @return string
- * <p>Mapped borken string.</p>
- *
- * @phpstan-param array<string, string> $map
- */
- private static function to_ascii_remap_intern(string $str, array &$map): string
- {
- // find all utf-8 characters
- $matches = [];
- if (!\preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) {
- return $str; // plain ascii string
- }
-
- // update the encoding map with the characters not already met
- $mapCount = \count($map);
- foreach ($matches[0] as $mbc) {
- if (!isset($map[$mbc])) {
- $map[$mbc] = \chr(128 + $mapCount);
- ++$mapCount;
- }
- }
-
- // finally, remap non-ascii characters
- return \strtr($str, $map);
- }
-
- /**
* Get the language from a string.
*
* e.g.: de_at -> de_at
diff --git a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php
index 68c3f9d25..d51f557a1 100644
--- a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php
+++ b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php
@@ -1060,9 +1060,9 @@ return [
'Я' => 'Ya',
'я' => 'ya',
],
- // Russian - GOST 7.79-2000(B)
+ // Russian - Passport (2013), ICAO
// -> https://en.m.wikipedia.org/wiki/Romanization_of_Russian#content-collapsible-block-1
- 'ru__gost_2000_b' => [
+ 'ru__passport_2013' => [
'А' => 'A',
'а' => 'a',
'Б' => 'B',
@@ -1075,8 +1075,8 @@ return [
'д' => 'd',
'Е' => 'E',
'е' => 'e',
- 'Ё' => 'Yo',
- 'ё' => 'yo',
+ 'Ё' => 'E',
+ 'ё' => 'e',
'Ж' => 'Zh',
'ж' => 'zh',
'З' => 'Z',
@@ -1107,42 +1107,42 @@ return [
'у' => 'u',
'Ф' => 'F',
'ф' => 'f',
- 'Х' => 'X',
- 'х' => 'x',
- 'Ц' => 'Cz',
- 'ц' => 'cz',
+ 'Х' => 'Kh',
+ 'х' => 'kh',
+ 'Ц' => 'Ts',
+ 'ц' => 'ts',
'Ч' => 'Ch',
'ч' => 'ch',
'ш' => 'sh',
'Ш' => 'Sh',
- 'Щ' => 'Shh',
- 'щ' => 'shh',
- 'Ъ' => '',
- 'ъ' => '',
- 'Ы' => 'Y\'',
- 'ы' => 'y\'',
+ 'Щ' => 'Shch',
+ 'щ' => 'shch',
+ 'Ъ' => 'Ie',
+ 'ъ' => 'ie',
+ 'Ы' => 'Y',
+ 'ы' => 'y',
'Ь' => '',
'ь' => '',
- 'Э' => 'E\'',
- 'э' => 'e\'',
- 'Ю' => 'Yu',
- 'ю' => 'yu',
- 'Я' => 'Ya',
- 'я' => 'ya',
- 'І' => 'I',
- 'і' => 'i',
- 'Ѳ' => 'Fh',
- 'ѳ' => 'fh',
- 'Ѣ' => 'Ye',
- 'ѣ' => 'ye',
- 'Ѵ' => 'Yh',
- 'ѵ' => 'yh',
+ 'Э' => 'E',
+ 'э' => 'e',
+ 'Ю' => 'Iu',
+ 'ю' => 'iu',
+ 'Я' => 'Ia',
+ 'я' => 'ia',
+ 'І' => '',
+ 'і' => '',
+ 'Ѳ' => '',
+ 'ѳ' => '',
+ 'Ѣ' => '',
+ 'ѣ' => '',
+ 'Ѵ' => '',
+ 'ѵ' => '',
'Є' => '',
'є' => '',
'Ѥ' => '',
'ѥ' => '',
- 'Ѕ' => 'Js',
- 'ѕ' => 'js',
+ 'Ѕ' => '',
+ 'ѕ' => '',
'Ꙋ' => '',
'ꙋ' => '',
'Ѡ' => '',
@@ -1162,9 +1162,9 @@ return [
'Ѱ' => '',
'ѱ' => '',
],
- // Russian - Passport (2013), ICAO
+ // Russian - GOST 7.79-2000(B)
// -> https://en.m.wikipedia.org/wiki/Romanization_of_Russian#content-collapsible-block-1
- 'ru__passport_2013' => [
+ 'ru__gost_2000_b' => [
'А' => 'A',
'а' => 'a',
'Б' => 'B',
@@ -1177,8 +1177,8 @@ return [
'д' => 'd',
'Е' => 'E',
'е' => 'e',
- 'Ё' => 'E',
- 'ё' => 'e',
+ 'Ё' => 'Yo',
+ 'ё' => 'yo',
'Ж' => 'Zh',
'ж' => 'zh',
'З' => 'Z',
@@ -1209,42 +1209,42 @@ return [
'у' => 'u',
'Ф' => 'F',
'ф' => 'f',
- 'Х' => 'Kh',
- 'х' => 'kh',
- 'Ц' => 'Ts',
- 'ц' => 'ts',
+ 'Х' => 'X',
+ 'х' => 'x',
+ 'Ц' => 'Cz',
+ 'ц' => 'cz',
'Ч' => 'Ch',
'ч' => 'ch',
'ш' => 'sh',
'Ш' => 'Sh',
- 'Щ' => 'Shch',
- 'щ' => 'shch',
- 'Ъ' => 'Ie',
- 'ъ' => 'ie',
- 'Ы' => 'Y',
- 'ы' => 'y',
+ 'Щ' => 'Shh',
+ 'щ' => 'shh',
+ 'Ъ' => '',
+ 'ъ' => '',
+ 'Ы' => 'Y\'',
+ 'ы' => 'y\'',
'Ь' => '',
'ь' => '',
- 'Э' => 'E',
- 'э' => 'e',
- 'Ю' => 'Iu',
- 'ю' => 'iu',
- 'Я' => 'Ia',
- 'я' => 'ia',
- 'І' => '',
- 'і' => '',
- 'Ѳ' => '',
- 'ѳ' => '',
- 'Ѣ' => '',
- 'ѣ' => '',
- 'Ѵ' => '',
- 'ѵ' => '',
+ 'Э' => 'E\'',
+ 'э' => 'e\'',
+ 'Ю' => 'Yu',
+ 'ю' => 'yu',
+ 'Я' => 'Ya',
+ 'я' => 'ya',
+ 'І' => 'I',
+ 'і' => 'i',
+ 'Ѳ' => 'Fh',
+ 'ѳ' => 'fh',
+ 'Ѣ' => 'Ye',
+ 'ѣ' => 'ye',
+ 'Ѵ' => 'Yh',
+ 'ѵ' => 'yh',
'Є' => '',
'є' => '',
'Ѥ' => '',
'ѥ' => '',
- 'Ѕ' => '',
- 'ѕ' => '',
+ 'Ѕ' => 'Js',
+ 'ѕ' => 'js',
'Ꙋ' => '',
'ꙋ' => '',
'Ѡ' => '',
@@ -1265,32 +1265,15 @@ return [
'ѱ' => '',
],
// Ukrainian
- // -> https://zakon.rada.gov.ua/laws/show/55-2010-%D0%BF?lang=en
'uk' => [
- 'Г' => 'H',
- 'г' => 'h',
- 'Ґ' => 'G',
- 'ґ' => 'g',
'Є' => 'Ye',
'є' => 'ye',
- 'И' => 'Y',
- 'и' => 'y',
'І' => 'I',
'і' => 'i',
'Ї' => 'Yi',
'ї' => 'yi',
- 'Й' => 'Y',
- 'й' => 'y',
- 'Х' => 'Kh',
- 'х' => 'kh',
- 'Ц' => 'Ts',
- 'ц' => 'ts',
- 'Ч' => 'Ch',
- 'ч' => 'ch',
- 'Ш' => 'Sh',
- 'ш' => 'sh',
- 'Щ' => 'Shch',
- 'щ' => 'shch',
+ 'Ґ' => 'G',
+ 'ґ' => 'g',
],
// Kazakh
'kk' => [
@@ -1619,10 +1602,10 @@ return [
'fa' => [
'ا' => 'a',
'ب' => 'b',
- 'پ' => 'p',
+ 'پ' => 'b',
'ت' => 't',
'ث' => 's',
- 'ج' => 'j',
+ 'ج' => 'g',
'چ' => 'ch',
'ح' => 'h',
'خ' => 'kh',
@@ -2572,8 +2555,6 @@ return [
'j̄' => 'j',
'J̃' => 'J',
'j̃' => 'j',
- 'Й' => 'i',
- 'й' => 'i',
'ĸ' => 'k',
'Ĺ' => 'L',
'Ľ' => 'L',
@@ -2829,8 +2810,6 @@ return [
'ȳ' => 'y',
'Ỹ' => 'Y',
'ỹ' => 'y',
- 'Щ' => 'Shh',
- 'щ' => 'shh',
'Ź' => 'Z',
'ź' => 'z',
'Z̀' => 'Z',
diff --git a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_extras_by_languages.php b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_extras_by_languages.php
index afe31ae2c..426d84a4d 100644
--- a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_extras_by_languages.php
+++ b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_extras_by_languages.php
@@ -199,8 +199,8 @@ return [
'&' => ' i ',
'+' => ' plus ',
],
- // Russian - GOST 7.79-2000(B)
- 'ru__gost_2000_b' => [
+ // Russian - Passport (2013), ICAO
+ 'ru__passport_2013' => [
'=' => ' ravnyj ',
'%' => ' procent ',
'∑' => ' summa ',
@@ -210,8 +210,8 @@ return [
'&' => ' i ',
'+' => ' plus ',
],
- // Russian - Passport (2013), ICAO
- 'ru__passport_2013' => [
+ // Russian - GOST 7.79-2000(B)
+ 'ru__gost_2000_b' => [
'=' => ' ravnyj ',
'%' => ' procent ',
'∑' => ' summa ',
diff --git a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_language_max_key.php b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_language_max_key.php
index da81ae236..a6345f213 100644
--- a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_language_max_key.php
+++ b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_language_max_key.php
@@ -31,8 +31,8 @@ return [
'fi' => 1,
'ka' => 1,
'ru' => 1,
- 'ru__gost_2000_b' => 1,
'ru__passport_2013' => 1,
+ 'ru__gost_2000_b' => 1,
'uk' => 1,
'kk' => 1,
'cs' => 1,