diff options
Diffstat (limited to 'vendor/voku')
11 files changed, 214 insertions, 348 deletions
diff --git a/vendor/voku/portable-ascii/.whitesource b/vendor/voku/portable-ascii/.whitesource deleted file mode 100644 index 55b922e8c..000000000 --- a/vendor/voku/portable-ascii/.whitesource +++ /dev/null @@ -1,12 +0,0 @@ -{ - "scanSettings": { - "baseBranches": [] - }, - "checkRunSettings": { - "vulnerableCheckRunConclusionLevel": "failure", - "displayMode": "diff" - }, - "issueSettings": { - "minSeverityLevel": "LOW" - } -}
\ No newline at end of file diff --git a/vendor/voku/portable-ascii/CHANGELOG.md b/vendor/voku/portable-ascii/CHANGELOG.md index 04b64acdd..12fc393a7 100644 --- a/vendor/voku/portable-ascii/CHANGELOG.md +++ b/vendor/voku/portable-ascii/CHANGELOG.md @@ -1,12 +1,23 @@ # Changelog -### 1.6.1 (2021-01-24) +### 2.0.1 (2022-03-08) + +- "To people of Russia": There is a war in Ukraine right now. The forces of the Russian Federation are attacking civilians. +- optimize some phpdocs + +### 2.0.0 (2022-01-24) + +- prefer "Russian - Passport (2013), ICAO" instead of "Russian - GOST 7.79-2000(B)" +- fix "Ukrainian" char-mapping (thanks to @Andr1yk0) +- fix "Persian" char-mapping (thanks to @frost-cyber) + +### 1.6.1 (2022-01-24) - revert: prefer "Russian - Passport (2013), ICAO" instead of "Russian - GOST 7.79-2000(B)" - revert: fix "Ukrainian" char-mapping (thanks to @Andr1yk0) - revert: fix "Persian" char-mapping (thanks to @frost-cyber) -### 1.6.0 (2021-01-24) +### 1.6.0 (2022-01-24) - prefer "Russian - Passport (2013), ICAO" instead of "Russian - GOST 7.79-2000(B)" - fix "Ukrainian" char-mapping (thanks to @Andr1yk0) diff --git a/vendor/voku/portable-ascii/README.md b/vendor/voku/portable-ascii/README.md index 929c21e29..3ce36d604 100644 --- a/vendor/voku/portable-ascii/README.md +++ b/vendor/voku/portable-ascii/README.md @@ -1,4 +1,6 @@ [//]: # (AUTO-GENERATED BY "PHP README Helper": base file -> docs/base.md) +[![SWUbanner](https://raw.githubusercontent.com/vshymanskyy/StandWithUkraine/main/banner2-direct.svg)](https://github.com/vshymanskyy/StandWithUkraine/blob/main/docs/README.md) + [![Build Status](https://github.com/voku/portable-ascii/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/voku/portable-ascii/actions) [![Build status](https://ci.appveyor.com/api/projects/status/gnejjnk7qplr7f5t/branch/master?svg=true)](https://ci.appveyor.com/project/voku/portable-ascii/branch/master) [![codecov.io](https://codecov.io/github/voku/portable-ascii/coverage.svg?branch=master)](https://codecov.io/github/voku/portable-ascii?branch=master) @@ -99,8 +101,9 @@ The API from the "ASCII"-Class is written as small static methods. </td></tr><tr><td><a href="#normalize_whitespacestring-str-bool-keepnonbreakingspace-bool-keepbidiunicodecontrols-bool-normalize_control_characters-string">normalize_whitespace</a> </td><td><a href="#remove_invisible_charactersstring-str-bool-url_encoded-string-replacement-bool-keep_basic_control_characters-string">remove_invisible_characters</a> </td><td><a href="#to_asciistring-str-string-language-bool-remove_unsupported_chars-bool-replace_extra_symbols-bool-use_transliterate-boolnull-replace_single_chars_only-string">to_ascii</a> -</td><td><a href="#to_filenamestring-str-bool-use_transliterate-string-fallback_char-string">to_filename</a> -</td></tr><tr><td><a href="#to_slugifystring-str-string-separator-string-language-string-replacements-bool-replace_extra_symbols-bool-use_str_to_lower-bool-use_transliterate-string">to_slugify</a> +</td><td><a href="#to_ascii_remapstring-str1-string-str2-string">to_ascii_remap</a> +</td></tr><tr><td><a href="#to_filenamestring-str-bool-use_transliterate-string-fallback_char-string">to_filename</a> +</td><td><a href="#to_slugifystring-str-string-separator-string-language-string-replacements-bool-replace_extra_symbols-bool-use_str_to_lower-bool-use_transliterate-string">to_slugify</a> </td><td><a href="#to_transliteratestring-str-stringnull-unknown-bool-strict-string">to_transliterate</a> </td></tr></table> @@ -152,7 +155,7 @@ echo $array['orig'][$tmpKey]; // 'ё' </code> **Parameters:** -- `string $language [optional] <p>Language of the source string e.g.: en, de_at, or de-ch. +- `ASCII::* $language [optional] <p>Language of the source string e.g.: en, de_at, or de-ch. (default is 'en') | ASCII::*_LANGUAGE_CODE</p>` - `bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>` - `bool $asOrigReplaceArray [optional] <p>TRUE === return {orig: string[], replace: string[]} @@ -308,7 +311,7 @@ ASCII::to_ascii('�Düsseldorf�', 'en'); // Dusseldorf **Parameters:** - `string $str <p>The input string.</p>` -- `string $language [optional] <p>Language of the source string. +- `ASCII::* $language [optional] <p>Language of the source string. (default is 'en') | ASCII::*_LANGUAGE_CODE</p>` - `bool $remove_unsupported_chars [optional] <p>Whether or not to remove the unsupported characters.</p>` @@ -325,6 +328,26 @@ language</p>` -------- +#### to_ascii_remap(string $str1, string $str2): string[] +<a href="#voku-php-readme-class-methods">↑</a> +WARNING: This method will return broken characters and is only for special cases. + +Convert two UTF-8 encoded string to a single-byte strings suitable for +functions that need the same string length after the conversion. + +The function simply uses (and updates) a tailored dynamic encoding +(in/out map parameter) where non-ascii characters are remapped to +the range [128-255] in order of appearance. + +**Parameters:** +- `string $str1` +- `string $str2` + +**Return:** +- `string[]` + +-------- + #### to_filename(string $str, bool $use_transliterate, string $fallback_char): string <a href="#voku-php-readme-class-methods">↑</a> Convert given string to safe filename (and keep string case). @@ -356,7 +379,7 @@ also be supplied for language-specific transliteration. **Parameters:** - `string $str` - `string $separator [optional] <p>The string used to replace whitespace.</p>` -- `string $language [optional] <p>Language of the source string. +- `ASCII::* $language [optional] <p>Language of the source string. (default is 'en') | ASCII::*_LANGUAGE_CODE</p>` - `array<string, string> $replacements [optional] <p>A map of replaceable strings.</p>` - `bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " diff --git a/vendor/voku/portable-ascii/build/composer.json b/vendor/voku/portable-ascii/build/composer.json deleted file mode 100644 index 30f30c3cc..000000000 --- a/vendor/voku/portable-ascii/build/composer.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "require-dev": { - "voku/php-readme-helper": "~0.6" - } -} diff --git a/vendor/voku/portable-ascii/build/docs/base.md b/vendor/voku/portable-ascii/build/docs/base.md deleted file mode 100644 index ca949d8b6..000000000 --- a/vendor/voku/portable-ascii/build/docs/base.md +++ /dev/null @@ -1,127 +0,0 @@ -[![Build Status](https://github.com/voku/portable-ascii/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/voku/portable-ascii/actions) -[![Build status](https://ci.appveyor.com/api/projects/status/gnejjnk7qplr7f5t/branch/master?svg=true)](https://ci.appveyor.com/project/voku/portable-ascii/branch/master) -[![codecov.io](https://codecov.io/github/voku/portable-ascii/coverage.svg?branch=master)](https://codecov.io/github/voku/portable-ascii?branch=master) -[![Codacy Badge](https://api.codacy.com/project/badge/Grade/997c9bb10d1c4791967bdf2e42013e8e)](https://www.codacy.com/app/voku/portable-ascii) -[![Latest Stable Version](https://poser.pugx.org/voku/portable-ascii/v/stable)](https://packagist.org/packages/voku/portable-ascii) -[![Total Downloads](https://poser.pugx.org/voku/portable-ascii/downloads)](https://packagist.org/packages/voku/portable-ascii) -[![License](https://poser.pugx.org/voku/portable-ascii/license)](https://packagist.org/packages/voku/portable-ascii) -[![Donate to this project using Paypal](https://img.shields.io/badge/paypal-donate-yellow.svg)](https://www.paypal.me/moelleken) -[![Donate to this project using Patreon](https://img.shields.io/badge/patreon-donate-yellow.svg)](https://www.patreon.com/voku) - -# 🔡 Portable ASCII - -## Description - -It is written in PHP (PHP 7+) and can work without "mbstring", "iconv" or any other extra encoding php-extension on your server. - -The benefit of Portable ASCII is that it is easy to use, easy to bundle. - -The project based on ... -+ Sean M. Burke's work (https://metacpan.org/pod/Text::Unidecode) -+ Tomaz Solc's work (https://pypi.org/project/Unidecode/) -+ Portable UTF-8 work (https://github.com/voku/portable-utf8) -+ Daniel St. Jules's work (https://github.com/danielstjules/Stringy) -+ Johnny Broadway's work (https://github.com/jbroadway/urlify) -+ and many cherry-picks from "github"-gists and "Stack Overflow"-snippets ... - -## Index - -* [Alternative](#alternative) -* [Install](#install-portable-ascii-via-composer-require) -* [Why Portable ASCII?](#why-portable-ascii) -* [Requirements and Recommendations](#requirements-and-recommendations) -* [Usage](#usage) -* [Class methods](#class-methods) -* [Unit Test](#unit-test) -* [License and Copyright](#license-and-copyright) - -## Alternative - -If you like a more Object Oriented Way to edit strings, then you can take a look at [voku/Stringy](https://github.com/voku/Stringy), it's a fork of "danielstjules/Stringy" but it used the "Portable ASCII"-Class and some extra methods. - -```php -// Portable ASCII -use voku\helper\ASCII; -ASCII::to_transliterate('déjà σσς iıii'); // 'deja sss iiii' - -// voku/Stringy -use Stringy\Stringy as S; -$stringy = S::create('déjà σσς iıii'); -$stringy->toTransliterate(); // 'deja sss iiii' -``` - -## Install "Portable ASCII" via "composer require" -```shell -composer require voku/portable-ascii -``` - -## Why Portable ASCII?[]() -I need ASCII char handling in different classes and before I added this functions into "Portable UTF-8", -but this repo is more modular and portable, because it has no dependencies. - -## Requirements and Recommendations - -* No extensions are required to run this library. Portable ASCII only needs PCRE library that is available by default since PHP 4.2.0 and cannot be disabled since PHP 5.3.0. "\u" modifier support in PCRE for ASCII handling is not a must. -* PHP 7.0 is the minimum requirement -* PHP 8.0 is also supported - -## Usage - -Example: ASCII::to_ascii() -```php - echo ASCII::to_ascii('�Düsseldorf�', 'de'); - - // will output - // Duesseldorf - - echo ASCII::to_ascii('�Düsseldorf�', 'en'); - - // will output - // Dusseldorf -``` - -# Portable ASCII | API - -The API from the "ASCII"-Class is written as small static methods. - - -## Class methods - -%__functions_index__voku\helper\ASCII__% - -%__functions_list__voku\helper\ASCII__% - - -## Unit Test - -1) [Composer](https://getcomposer.org) is a prerequisite for running the tests. - -``` -composer install -``` - -2) The tests can be executed by running this command from the root directory: - -```bash -./vendor/bin/phpunit -``` - -### Support - -For support and donations please visit [Github](https://github.com/voku/portable-ascii/) | [Issues](https://github.com/voku/portable-ascii/issues) | [PayPal](https://paypal.me/moelleken) | [Patreon](https://www.patreon.com/voku). - -For status updates and release announcements please visit [Releases](https://github.com/voku/portable-ascii/releases) | [Twitter](https://twitter.com/suckup_de) | [Patreon](https://www.patreon.com/voku/posts). - -For professional support please contact [me](https://about.me/voku). - -### Thanks - -- Thanks to [GitHub](https://github.com) (Microsoft) for hosting the code and a good infrastructure including Issues-Managment, etc. -- Thanks to [IntelliJ](https://www.jetbrains.com) as they make the best IDEs for PHP and they gave me an open source license for PhpStorm! -- Thanks to [Travis CI](https://travis-ci.com/) for being the most awesome, easiest continous integration tool out there! -- Thanks to [StyleCI](https://styleci.io/) for the simple but powerful code style check. -- Thanks to [PHPStan](https://github.com/phpstan/phpstan) && [Psalm](https://github.com/vimeo/psalm) for really great Static analysis tools and for discover bugs in the code! - -### License and Copyright - -Released under the MIT License - see `LICENSE.txt` for details. diff --git a/vendor/voku/portable-ascii/build/generate_docs.php b/vendor/voku/portable-ascii/build/generate_docs.php deleted file mode 100644 index c86f1f195..000000000 --- a/vendor/voku/portable-ascii/build/generate_docs.php +++ /dev/null @@ -1,26 +0,0 @@ -<?php - -require __DIR__ . '/../vendor/autoload.php'; -require __DIR__ . '/vendor/autoload.php'; - -$readmeGenerator = new \voku\PhpReadmeHelper\GenerateApi(); -$readmeGenerator->templateMethod = <<<RAW -#### %name% -<a href="#voku-php-readme-class-methods">↑</a> -%description% - -**Parameters:** -%params% - -**Return:** -%return% - --------- - -RAW; -$readmeText = ($readmeGenerator)->generate( - __DIR__ . '/../src/voku/helper/ASCII.php', - __DIR__ . '/docs/base.md' -); - -file_put_contents(__DIR__ . '/../README.md', $readmeText); diff --git a/vendor/voku/portable-ascii/build/generate_max_key_length.php b/vendor/voku/portable-ascii/build/generate_max_key_length.php deleted file mode 100644 index 743d389f5..000000000 --- a/vendor/voku/portable-ascii/build/generate_max_key_length.php +++ /dev/null @@ -1,20 +0,0 @@ -<?php - -require __DIR__ . '/../vendor/autoload.php'; - -$languages = \voku\helper\ASCII::getAllLanguages(); - -$languagesKeyLengths = []; -foreach ($languages as $language) { - $langSpecific = \voku\helper\ASCII::charsArrayWithOneLanguage($language, false, false); - - $langSpecificKeyLength = \array_map('\mb_strlen', \array_keys($langSpecific)); - - if (count($langSpecificKeyLength) === 0) { - $languagesKeyLengths[$language] = 0; - } else { - $languagesKeyLengths[$language] = \max($langSpecificKeyLength); - } -} - -//var_export($languagesKeyLengths); diff --git a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php index aa676abda..7d8b66de7 100644 --- a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php +++ b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php @@ -5,6 +5,22 @@ declare(strict_types=1); namespace voku\helper; /** + * ## 🇷🇺 Русским гражданам + * В Украине сейчас идет война. Силами РФ наносятся удары по гражданской инфраструктуре в [Харькове][1], [Киеве][2], [Чернигове][3], [Сумах][4], [Ирпене][5] и десятках других городов. Гибнут люди - и гражданское население, и военные, в том числе российские призывники, которых бросили воевать. Чтобы лишить собственный народ доступа к информации, правительство РФ запретило называть войну войной, закрыло независимые СМИ и принимает сейчас ряд диктаторских законов. Эти законы призваны заткнуть рот всем, кто против войны. За обычный призыв к миру сейчас можно получить несколько лет тюрьмы. + * Не молчите! Молчание - знак вашего согласия с политикой российского правительства. + * **Вы можете сделать выбор НЕ МОЛЧАТЬ.** + * --- + * ## 🇺🇸 To people of Russia + * There is a war in Ukraine right now. The forces of the Russian Federation are attacking civilian infrastructure in [Kharkiv][1], [Kyiv][2], [Chernihiv][3], [Sumy][4], [Irpin][5] and dozens of other cities. People are dying – both civilians and military servicemen, including Russian conscripts who were thrown into the fighting. In order to deprive its own people of access to information, the government of the Russian Federation has forbidden calling a war a war, shut down independent media and is passing a number of dictatorial laws. These laws are meant to silence all those who are against war. You can be jailed for multiple years for simply calling for peace. + * Do not be silent! Silence is a sign that you accept the Russian government's policy. + * **You can choose NOT TO BE SILENT.** + * --- + * - [1] https://cloudfront-us-east-2.images.arcpublishing.com/reuters/P7K2MSZDGFMIJPDD7CI2GIROJI.jpg "Kharkiv under attack" + * - [2] https://gdb.voanews.com/01bd0000-0aff-0242-fad0-08d9fc92c5b3_cx0_cy5_cw0_w1023_r1_s.jpg "Kyiv under attack" + * - [3] https://ichef.bbci.co.uk/news/976/cpsprodpb/163DD/production/_123510119_hi074310744.jpg "Chernihiv under attack" + * - [4] https://www.youtube.com/watch?v=8K-bkqKKf2A "Sumy under attack" + * - [5] https://cloudfront-us-east-2.images.arcpublishing.com/reuters/K4MTMLEHTRKGFK3GSKAT4GR3NE.jpg "Irpin under attack" + * * @psalm-immutable */ final class ASCII @@ -272,9 +288,7 @@ final class ASCII */ public static function charsArrayWithMultiLanguageValues(bool $replace_extra_symbols = false): array { - /** - * @var array<string, array> - */ + /** @var array<string, array<string, array<int, string>>> */ static $CHARS_ARRAY = []; $cacheKey = '' . $replace_extra_symbols; @@ -329,6 +343,7 @@ final class ASCII * @return array * <p>An array of replacements.</p> * + * @phpstan-param ASCII::*_LANGUAGE_CODE $language * @phpstan-return array{orig: string[], replace: string[]}|array<string, string> */ public static function charsArrayWithOneLanguage( @@ -339,9 +354,7 @@ final class ASCII $language = self::get_language($language); // init - /** - * @var array<string, array> - */ + /** @var array<string, array<string, array<string, string>|array{orig: string[], replace: string[]}>> */ static $CHARS_ARRAY = []; $cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray; @@ -432,9 +445,7 @@ final class ASCII bool $asOrigReplaceArray = true ): array { // init - /** - * @var array<string,array> - */ + /** @var array<string, array<string, string>|array{orig: string[], replace: string[]}> */ static $CHARS_ARRAY = []; $cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray; @@ -460,6 +471,7 @@ final class ASCII } } + /** @phpstan-ignore-next-line - ... error? */ $CHARS_ARRAY[$cacheKey] = \array_merge([], ...$CHARS_ARRAY[$cacheKey]); if ($asOrigReplaceArray) { @@ -578,19 +590,13 @@ final class ASCII return ''; } - /** - * @var array{orig: string[], replace: string[]} - */ + /** @var array{orig: string[], replace: string[]} */ static $MSWORD_CACHE = ['orig' => [], 'replace' => []]; if (empty($MSWORD_CACHE['orig'])) { self::prepareAsciiMaps(); - /** - * @psalm-suppress PossiblyNullArrayAccess - we use the prepare* methods here, so we don't get NULL here - * - * @var array<string, string> - */ + /** @var array<string, string> */ $map = self::$ASCII_MAPS[self::EXTRA_MSWORD_CHARS_LANGUAGE_CODE] ?? []; $MSWORD_CACHE = [ @@ -630,9 +636,7 @@ final class ASCII return ''; } - /** - * @var array<int,array<string,string>> - */ + /** @var array<int,array<string,string>> */ static $WHITESPACE_CACHE = []; $cacheKey = (int) $keepNonBreakingSpace; @@ -665,13 +669,11 @@ final class ASCII unset($WHITESPACE_CACHE[$cacheKey]["\xc2\xa0"]); } - $WHITESPACE_CACHE[$cacheKey] = \array_keys($WHITESPACE_CACHE[$cacheKey]); + $WHITESPACE_CACHE[$cacheKey] = array_keys($WHITESPACE_CACHE[$cacheKey]); } if (!$keepBidiUnicodeControls) { - /** - * @var array<int,string>|null - */ + /** @var array<int,string>|null */ static $BIDI_UNICODE_CONTROLS_CACHE = null; if ($BIDI_UNICODE_CONTROLS_CACHE === null) { @@ -759,45 +761,6 @@ final class ASCII } /** - * WARNING: This method will return broken characters and is only for special cases. - * - * Convert a UTF-8 encoded string to a single-byte string suitable for - * functions that need the same string length after the conversion. - * - * The function simply uses (and updates) a tailored dynamic encoding - * (in/out map parameter) where non-ascii characters are remapped to - * the range [128-255] in order of appearance. - * - * Thus, it supports up to 128 different multibyte code points max over - * the whole set of strings sharing this encoding. - * - * Source: https://github.com/KEINOS/mb_levenshtein - * - * @param string $str UTF-8 string to be converted to extended ASCII. - * @return string Mapped borken string. - */ - private static function to_ascii_remap_intern(string $str, array &$map): string - { - // find all utf-8 characters - $matches = []; - if (!\preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) { - return $str; // plain ascii string - } - - // update the encoding map with the characters not already met - $mapCount = \count($map); - foreach ($matches[0] as $mbc) { - if (!isset($map[$mbc])) { - $map[$mbc] = \chr(128 + $mapCount); - $mapCount++; - } - } - - // finally remap non-ascii characters - return \strtr($str, $map); - } - - /** * Returns an ASCII version of the string. A set of non-ASCII characters are * replaced with their closest ASCII counterparts, and the rest are removed * by default. The language or locale of the source string can be supplied @@ -826,6 +789,8 @@ final class ASCII * * @return string * <p>A string that contains only ASCII characters.</p> + * + * @phpstan-param ASCII::*_LANGUAGE_CODE $language */ public static function to_ascii( string $str, @@ -839,13 +804,12 @@ final class ASCII return ''; } + /** @phpstan-var ASCII::*_LANGUAGE_CODE - hack for phpstan */ $language = self::get_language($language); static $EXTRA_SYMBOLS_CACHE = null; - /** - * @var array<string,array<string,string>> - */ + /** @var array<string,array<string,string>> */ static $REPLACE_HELPER_CACHE = []; $cacheKey = $language . '-' . $replace_extra_symbols; @@ -1019,7 +983,6 @@ final class ASCII } if ($use_transliterate) { - /** @noinspection ArgumentEqualsDefaultValueInspection */ $str = self::to_transliterate($str, null, false); } @@ -1061,9 +1024,9 @@ final class ASCII $str = (string) \preg_replace( [ - '/[^' . $fallback_char_escaped . '.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars - '/[\\s]+/u', // 2) convert spaces to $fallback_char - '/[' . $fallback_char_escaped . ']+/u', // 3) remove double $fallback_char's + '/[^' . $fallback_char_escaped . '.\\-a-zA-Z\d\\s]/', // 1) remove un-needed chars + '/\s+/u', // 2) convert spaces to $fallback_char + '/[' . $fallback_char_escaped . ']+/u', // 3) remove double $fallback_char's ], [ '', @@ -1098,6 +1061,8 @@ final class ASCII * * @return string * <p>A string that has been converted to an URL slug.</p> + * + * @phpstan-param ASCII::*_LANGUAGE_CODE $language */ public static function to_slugify( string $str, @@ -1178,19 +1143,13 @@ final class ASCII $unknown = '?', bool $strict = false ): string { - /** - * @var array<int,string>|null - */ + /** @var array<int,string>|null */ static $UTF8_TO_TRANSLIT = null; - /** - * null|\Transliterator - */ + /** null|\Transliterator */ static $TRANSLITERATOR = null; - /** - * @var bool|null - */ + /** @var bool|null */ static $SUPPORT_INTL = null; if ($str === '') { @@ -1225,9 +1184,7 @@ final class ASCII ) { if (!isset($TRANSLITERATOR)) { // INFO: see "*-Latin" rules via "transliterator_list_ids()" - /** - * @var \Transliterator - */ + /** @var \Transliterator */ $TRANSLITERATOR = \transliterator_create('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;'); } @@ -1377,6 +1334,50 @@ final class ASCII } /** + * WARNING: This method will return broken characters and is only for special cases. + * + * Convert a UTF-8 encoded string to a single-byte string suitable for + * functions that need the same string length after the conversion. + * + * The function simply uses (and updates) a tailored dynamic encoding + * (in/out map parameter) where non-ascii characters are remapped to + * the range [128-255] in order of appearance. + * + * Thus, it supports up to 128 different multibyte code points max over + * the whole set of strings sharing this encoding. + * + * Source: https://github.com/KEINOS/mb_levenshtein + * + * @param string $str <p>UTF-8 string to be converted to extended ASCII.</p> + * @param array $map <p>Internal-Map of code points to ASCII characters.</p> + * + * @return string + * <p>Mapped borken string.</p> + * + * @phpstan-param array<string, string> $map + */ + private static function to_ascii_remap_intern(string $str, array &$map): string + { + // find all utf-8 characters + $matches = []; + if (!\preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) { + return $str; // plain ascii string + } + + // update the encoding map with the characters not already met + $mapCount = \count($map); + foreach ($matches[0] as $mbc) { + if (!isset($map[$mbc])) { + $map[$mbc] = \chr(128 + $mapCount); + ++$mapCount; + } + } + + // finally, remap non-ascii characters + return \strtr($str, $map); + } + + /** * Get the language from a string. * * e.g.: de_at -> de_at diff --git a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php index d51f557a1..68c3f9d25 100644 --- a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php +++ b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php @@ -1060,9 +1060,9 @@ return [ 'Я' => 'Ya', 'я' => 'ya', ], - // Russian - Passport (2013), ICAO + // Russian - GOST 7.79-2000(B) // -> https://en.m.wikipedia.org/wiki/Romanization_of_Russian#content-collapsible-block-1 - 'ru__passport_2013' => [ + 'ru__gost_2000_b' => [ 'А' => 'A', 'а' => 'a', 'Б' => 'B', @@ -1075,8 +1075,8 @@ return [ 'д' => 'd', 'Е' => 'E', 'е' => 'e', - 'Ё' => 'E', - 'ё' => 'e', + 'Ё' => 'Yo', + 'ё' => 'yo', 'Ж' => 'Zh', 'ж' => 'zh', 'З' => 'Z', @@ -1107,42 +1107,42 @@ return [ 'у' => 'u', 'Ф' => 'F', 'ф' => 'f', - 'Х' => 'Kh', - 'х' => 'kh', - 'Ц' => 'Ts', - 'ц' => 'ts', + 'Х' => 'X', + 'х' => 'x', + 'Ц' => 'Cz', + 'ц' => 'cz', 'Ч' => 'Ch', 'ч' => 'ch', 'ш' => 'sh', 'Ш' => 'Sh', - 'Щ' => 'Shch', - 'щ' => 'shch', - 'Ъ' => 'Ie', - 'ъ' => 'ie', - 'Ы' => 'Y', - 'ы' => 'y', + 'Щ' => 'Shh', + 'щ' => 'shh', + 'Ъ' => '', + 'ъ' => '', + 'Ы' => 'Y\'', + 'ы' => 'y\'', 'Ь' => '', 'ь' => '', - 'Э' => 'E', - 'э' => 'e', - 'Ю' => 'Iu', - 'ю' => 'iu', - 'Я' => 'Ia', - 'я' => 'ia', - 'І' => '', - 'і' => '', - 'Ѳ' => '', - 'ѳ' => '', - 'Ѣ' => '', - 'ѣ' => '', - 'Ѵ' => '', - 'ѵ' => '', + 'Э' => 'E\'', + 'э' => 'e\'', + 'Ю' => 'Yu', + 'ю' => 'yu', + 'Я' => 'Ya', + 'я' => 'ya', + 'І' => 'I', + 'і' => 'i', + 'Ѳ' => 'Fh', + 'ѳ' => 'fh', + 'Ѣ' => 'Ye', + 'ѣ' => 'ye', + 'Ѵ' => 'Yh', + 'ѵ' => 'yh', 'Є' => '', 'є' => '', 'Ѥ' => '', 'ѥ' => '', - 'Ѕ' => '', - 'ѕ' => '', + 'Ѕ' => 'Js', + 'ѕ' => 'js', 'Ꙋ' => '', 'ꙋ' => '', 'Ѡ' => '', @@ -1162,9 +1162,9 @@ return [ 'Ѱ' => '', 'ѱ' => '', ], - // Russian - GOST 7.79-2000(B) + // Russian - Passport (2013), ICAO // -> https://en.m.wikipedia.org/wiki/Romanization_of_Russian#content-collapsible-block-1 - 'ru__gost_2000_b' => [ + 'ru__passport_2013' => [ 'А' => 'A', 'а' => 'a', 'Б' => 'B', @@ -1177,8 +1177,8 @@ return [ 'д' => 'd', 'Е' => 'E', 'е' => 'e', - 'Ё' => 'Yo', - 'ё' => 'yo', + 'Ё' => 'E', + 'ё' => 'e', 'Ж' => 'Zh', 'ж' => 'zh', 'З' => 'Z', @@ -1209,42 +1209,42 @@ return [ 'у' => 'u', 'Ф' => 'F', 'ф' => 'f', - 'Х' => 'X', - 'х' => 'x', - 'Ц' => 'Cz', - 'ц' => 'cz', + 'Х' => 'Kh', + 'х' => 'kh', + 'Ц' => 'Ts', + 'ц' => 'ts', 'Ч' => 'Ch', 'ч' => 'ch', 'ш' => 'sh', 'Ш' => 'Sh', - 'Щ' => 'Shh', - 'щ' => 'shh', - 'Ъ' => '', - 'ъ' => '', - 'Ы' => 'Y\'', - 'ы' => 'y\'', + 'Щ' => 'Shch', + 'щ' => 'shch', + 'Ъ' => 'Ie', + 'ъ' => 'ie', + 'Ы' => 'Y', + 'ы' => 'y', 'Ь' => '', 'ь' => '', - 'Э' => 'E\'', - 'э' => 'e\'', - 'Ю' => 'Yu', - 'ю' => 'yu', - 'Я' => 'Ya', - 'я' => 'ya', - 'І' => 'I', - 'і' => 'i', - 'Ѳ' => 'Fh', - 'ѳ' => 'fh', - 'Ѣ' => 'Ye', - 'ѣ' => 'ye', - 'Ѵ' => 'Yh', - 'ѵ' => 'yh', + 'Э' => 'E', + 'э' => 'e', + 'Ю' => 'Iu', + 'ю' => 'iu', + 'Я' => 'Ia', + 'я' => 'ia', + 'І' => '', + 'і' => '', + 'Ѳ' => '', + 'ѳ' => '', + 'Ѣ' => '', + 'ѣ' => '', + 'Ѵ' => '', + 'ѵ' => '', 'Є' => '', 'є' => '', 'Ѥ' => '', 'ѥ' => '', - 'Ѕ' => 'Js', - 'ѕ' => 'js', + 'Ѕ' => '', + 'ѕ' => '', 'Ꙋ' => '', 'ꙋ' => '', 'Ѡ' => '', @@ -1265,15 +1265,32 @@ return [ 'ѱ' => '', ], // Ukrainian + // -> https://zakon.rada.gov.ua/laws/show/55-2010-%D0%BF?lang=en 'uk' => [ + 'Г' => 'H', + 'г' => 'h', + 'Ґ' => 'G', + 'ґ' => 'g', 'Є' => 'Ye', 'є' => 'ye', + 'И' => 'Y', + 'и' => 'y', 'І' => 'I', 'і' => 'i', 'Ї' => 'Yi', 'ї' => 'yi', - 'Ґ' => 'G', - 'ґ' => 'g', + 'Й' => 'Y', + 'й' => 'y', + 'Х' => 'Kh', + 'х' => 'kh', + 'Ц' => 'Ts', + 'ц' => 'ts', + 'Ч' => 'Ch', + 'ч' => 'ch', + 'Ш' => 'Sh', + 'ш' => 'sh', + 'Щ' => 'Shch', + 'щ' => 'shch', ], // Kazakh 'kk' => [ @@ -1602,10 +1619,10 @@ return [ 'fa' => [ 'ا' => 'a', 'ب' => 'b', - 'پ' => 'b', + 'پ' => 'p', 'ت' => 't', 'ث' => 's', - 'ج' => 'g', + 'ج' => 'j', 'چ' => 'ch', 'ح' => 'h', 'خ' => 'kh', @@ -2555,6 +2572,8 @@ return [ 'j̄' => 'j', 'J̃' => 'J', 'j̃' => 'j', + 'Й' => 'i', + 'й' => 'i', 'ĸ' => 'k', 'Ĺ' => 'L', 'Ľ' => 'L', @@ -2810,6 +2829,8 @@ return [ 'ȳ' => 'y', 'Ỹ' => 'Y', 'ỹ' => 'y', + 'Щ' => 'Shh', + 'щ' => 'shh', 'Ź' => 'Z', 'ź' => 'z', 'Z̀' => 'Z', diff --git a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_extras_by_languages.php b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_extras_by_languages.php index 426d84a4d..afe31ae2c 100644 --- a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_extras_by_languages.php +++ b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_extras_by_languages.php @@ -199,8 +199,8 @@ return [ '&' => ' i ', '+' => ' plus ', ], - // Russian - Passport (2013), ICAO - 'ru__passport_2013' => [ + // Russian - GOST 7.79-2000(B) + 'ru__gost_2000_b' => [ '=' => ' ravnyj ', '%' => ' procent ', '∑' => ' summa ', @@ -210,8 +210,8 @@ return [ '&' => ' i ', '+' => ' plus ', ], - // Russian - GOST 7.79-2000(B) - 'ru__gost_2000_b' => [ + // Russian - Passport (2013), ICAO + 'ru__passport_2013' => [ '=' => ' ravnyj ', '%' => ' procent ', '∑' => ' summa ', diff --git a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_language_max_key.php b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_language_max_key.php index a6345f213..da81ae236 100644 --- a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_language_max_key.php +++ b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_language_max_key.php @@ -31,8 +31,8 @@ return [ 'fi' => 1, 'ka' => 1, 'ru' => 1, - 'ru__passport_2013' => 1, 'ru__gost_2000_b' => 1, + 'ru__passport_2013' => 1, 'uk' => 1, 'kk' => 1, 'cs' => 1, |