aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/voku/portable-ascii/src/voku/helper/ASCII.php')
-rw-r--r--vendor/voku/portable-ascii/src/voku/helper/ASCII.php1440
1 files changed, 1440 insertions, 0 deletions
diff --git a/vendor/voku/portable-ascii/src/voku/helper/ASCII.php b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
new file mode 100644
index 000000000..d4ec32ab1
--- /dev/null
+++ b/vendor/voku/portable-ascii/src/voku/helper/ASCII.php
@@ -0,0 +1,1440 @@
+<?php
+
+declare(strict_types=1);
+
+namespace voku\helper;
+
+/**
+ * @psalm-immutable
+ */
+final class ASCII
+{
+ //
+ // INFO: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
+ //
+
+ const UZBEK_LANGUAGE_CODE = 'uz';
+
+ const TURKMEN_LANGUAGE_CODE = 'tk';
+
+ const THAI_LANGUAGE_CODE = 'th';
+
+ const PASHTO_LANGUAGE_CODE = 'ps';
+
+ const ORIYA_LANGUAGE_CODE = 'or';
+
+ const MONGOLIAN_LANGUAGE_CODE = 'mn';
+
+ const KOREAN_LANGUAGE_CODE = 'ko';
+
+ const KIRGHIZ_LANGUAGE_CODE = 'ky';
+
+ const ARMENIAN_LANGUAGE_CODE = 'hy';
+
+ const BENGALI_LANGUAGE_CODE = 'bn';
+
+ const BELARUSIAN_LANGUAGE_CODE = 'be';
+
+ const AMHARIC_LANGUAGE_CODE = 'am';
+
+ const JAPANESE_LANGUAGE_CODE = 'ja';
+
+ const CHINESE_LANGUAGE_CODE = 'zh';
+
+ const DUTCH_LANGUAGE_CODE = 'nl';
+
+ const ITALIAN_LANGUAGE_CODE = 'it';
+
+ const MACEDONIAN_LANGUAGE_CODE = 'mk';
+
+ const PORTUGUESE_LANGUAGE_CODE = 'pt';
+
+ const GREEKLISH_LANGUAGE_CODE = 'el__greeklish';
+
+ const GREEK_LANGUAGE_CODE = 'el';
+
+ const HINDI_LANGUAGE_CODE = 'hi';
+
+ const SWEDISH_LANGUAGE_CODE = 'sv';
+
+ const TURKISH_LANGUAGE_CODE = 'tr';
+
+ const BULGARIAN_LANGUAGE_CODE = 'bg';
+
+ const HUNGARIAN_LANGUAGE_CODE = 'hu';
+
+ const MYANMAR_LANGUAGE_CODE = 'my';
+
+ const CROATIAN_LANGUAGE_CODE = 'hr';
+
+ const FINNISH_LANGUAGE_CODE = 'fi';
+
+ const GEORGIAN_LANGUAGE_CODE = 'ka';
+
+ const RUSSIAN_LANGUAGE_CODE = 'ru';
+
+ const RUSSIAN_PASSPORT_2013_LANGUAGE_CODE = 'ru__passport_2013';
+
+ const RUSSIAN_GOST_2000_B_LANGUAGE_CODE = 'ru__gost_2000_b';
+
+ const UKRAINIAN_LANGUAGE_CODE = 'uk';
+
+ const KAZAKH_LANGUAGE_CODE = 'kk';
+
+ const CZECH_LANGUAGE_CODE = 'cs';
+
+ const DANISH_LANGUAGE_CODE = 'da';
+
+ const POLISH_LANGUAGE_CODE = 'pl';
+
+ const ROMANIAN_LANGUAGE_CODE = 'ro';
+
+ const ESPERANTO_LANGUAGE_CODE = 'eo';
+
+ const ESTONIAN_LANGUAGE_CODE = 'et';
+
+ const LATVIAN_LANGUAGE_CODE = 'lv';
+
+ const LITHUANIAN_LANGUAGE_CODE = 'lt';
+
+ const NORWEGIAN_LANGUAGE_CODE = 'no';
+
+ const VIETNAMESE_LANGUAGE_CODE = 'vi';
+
+ const ARABIC_LANGUAGE_CODE = 'ar';
+
+ const PERSIAN_LANGUAGE_CODE = 'fa';
+
+ const SERBIAN_LANGUAGE_CODE = 'sr';
+
+ const SERBIAN_CYRILLIC_LANGUAGE_CODE = 'sr__cyr';
+
+ const SERBIAN_LATIN_LANGUAGE_CODE = 'sr__lat';
+
+ const AZERBAIJANI_LANGUAGE_CODE = 'az';
+
+ const SLOVAK_LANGUAGE_CODE = 'sk';
+
+ const FRENCH_LANGUAGE_CODE = 'fr';
+
+ const FRENCH_AUSTRIAN_LANGUAGE_CODE = 'fr_at';
+
+ const FRENCH_SWITZERLAND_LANGUAGE_CODE = 'fr_ch';
+
+ const GERMAN_LANGUAGE_CODE = 'de';
+
+ const GERMAN_AUSTRIAN_LANGUAGE_CODE = 'de_at';
+
+ const GERMAN_SWITZERLAND_LANGUAGE_CODE = 'de_ch';
+
+ const ENGLISH_LANGUAGE_CODE = 'en';
+
+ const EXTRA_LATIN_CHARS_LANGUAGE_CODE = 'latin';
+
+ const EXTRA_WHITESPACE_CHARS_LANGUAGE_CODE = ' ';
+
+ const EXTRA_MSWORD_CHARS_LANGUAGE_CODE = 'msword';
+
+ /**
+ * @var array<string, array<string, string>>|null
+ */
+ private static $ASCII_MAPS;
+
+ /**
+ * @var array<string, array<string, string>>|null
+ */
+ private static $ASCII_MAPS_AND_EXTRAS;
+
+ /**
+ * @var array<string, array<string, string>>|null
+ */
+ private static $ASCII_EXTRAS;
+
+ /**
+ * @var array<string, int>|null
+ */
+ private static $ORD;
+
+ /**
+ * @var array<string, int>|null
+ */
+ private static $LANGUAGE_MAX_KEY;
+
+ /**
+ * url: https://en.wikipedia.org/wiki/Wikipedia:ASCII#ASCII_printable_characters
+ *
+ * @var string
+ */
+ private static $REGEX_ASCII = "[^\x09\x10\x13\x0A\x0D\x20-\x7E]";
+
+ /**
+ * bidirectional text chars
+ *
+ * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
+ *
+ * @var array<int, string>
+ */
+ private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
+ // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
+ 8234 => "\xE2\x80\xAA",
+ // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
+ 8235 => "\xE2\x80\xAB",
+ // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
+ 8236 => "\xE2\x80\xAC",
+ // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
+ 8237 => "\xE2\x80\xAD",
+ // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
+ 8238 => "\xE2\x80\xAE",
+ // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
+ 8294 => "\xE2\x81\xA6",
+ // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
+ 8295 => "\xE2\x81\xA7",
+ // FIRST STRONG ISOLATE // (use -> dir = "auto")
+ 8296 => "\xE2\x81\xA8",
+ // POP DIRECTIONAL ISOLATE
+ 8297 => "\xE2\x81\xA9",
+ ];
+
+ /**
+ * Get all languages from the constants "ASCII::.*LANGUAGE_CODE".
+ *
+ * @return string[]
+ *
+ * @psalm-return array<string, string>
+ */
+ public static function getAllLanguages(): array
+ {
+ // init
+ static $LANGUAGES = [];
+
+ if ($LANGUAGES !== []) {
+ return $LANGUAGES;
+ }
+
+ foreach ((new \ReflectionClass(__CLASS__))->getConstants() as $constant => $lang) {
+ if (\strpos($constant, 'EXTRA') !== false) {
+ $LANGUAGES[\strtolower($constant)] = $lang;
+ } else {
+ $LANGUAGES[\strtolower(\str_replace('_LANGUAGE_CODE', '', $constant))] = $lang;
+ }
+ }
+
+ return $LANGUAGES;
+ }
+
+ /**
+ * Returns an replacement array for ASCII methods.
+ *
+ * EXAMPLE: <code>
+ * $array = ASCII::charsArray();
+ * var_dump($array['ru']['б']); // 'b'
+ * </code>
+ *
+ * @psalm-suppress InvalidNullableReturnType - we use the prepare* methods here, so we don't get NULL here
+ *
+ * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
+ *
+ * @psalm-pure
+ *
+ * @return array
+ *
+ * @psalm-return array<string, array<string , string>>
+ */
+ public static function charsArray(bool $replace_extra_symbols = false): array
+ {
+ if ($replace_extra_symbols) {
+ self::prepareAsciiAndExtrasMaps();
+
+ return self::$ASCII_MAPS_AND_EXTRAS ?? [];
+ }
+
+ self::prepareAsciiMaps();
+
+ return self::$ASCII_MAPS ?? [];
+ }
+
+ /**
+ * Returns an replacement array for ASCII methods with a mix of multiple languages.
+ *
+ * EXAMPLE: <code>
+ * $array = ASCII::charsArrayWithMultiLanguageValues();
+ * var_dump($array['b']); // ['β', 'б', 'ဗ', 'ბ', 'ب']
+ * </code>
+ *
+ * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
+ *
+ * @psalm-pure
+ *
+ * @return array
+ * <p>An array of replacements.</p>
+ *
+ * @psalm-return array<string, array<int, string>>
+ */
+ public static function charsArrayWithMultiLanguageValues(bool $replace_extra_symbols = false): array
+ {
+ /**
+ * @var array<string, array>
+ */
+ static $CHARS_ARRAY = [];
+ $cacheKey = '' . $replace_extra_symbols;
+
+ if (isset($CHARS_ARRAY[$cacheKey])) {
+ return $CHARS_ARRAY[$cacheKey];
+ }
+
+ // init
+ $return = [];
+ $language_all_chars = self::charsArrayWithSingleLanguageValues(
+ $replace_extra_symbols,
+ false
+ );
+
+ /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
+ /** @var array<string, string> $language_all_chars */
+ $language_all_chars = $language_all_chars;
+
+ /** @noinspection AlterInForeachInspection */
+ foreach ($language_all_chars as $key => &$value) {
+ $return[$value][] = $key;
+ }
+
+ $CHARS_ARRAY[$cacheKey] = $return;
+
+ /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
+ /** @var array<string, array<int, string>> $return */
+ $return = $return;
+
+ return $return;
+ }
+
+ /**
+ * Returns an replacement array for ASCII methods with one language.
+ *
+ * For example, German will map 'ä' to 'ae', while other languages
+ * will simply return e.g. 'a'.
+ *
+ * EXAMPLE: <code>
+ * $array = ASCII::charsArrayWithOneLanguage('ru');
+ * $tmpKey = \array_search('yo', $array['replace']);
+ * echo $array['orig'][$tmpKey]; // 'ё'
+ * </code>
+ *
+ * @psalm-suppress InvalidNullableReturnType - we use the prepare* methods here, so we don't get NULL here
+ *
+ * @param string $language [optional] <p>Language of the source string e.g.: en, de_at, or de-ch.
+ * (default is 'en') | ASCII::*_LANGUAGE_CODE</p>
+ * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
+ * @param bool $asOrigReplaceArray [optional] <p>TRUE === return {orig: string[], replace: string[]}
+ * array</p>
+ *
+ * @psalm-pure
+ *
+ * @return array
+ * <p>An array of replacements.</p>
+ *
+ * @psalm-return array{orig: string[], replace: string[]}|array<string, string>
+ */
+ public static function charsArrayWithOneLanguage(
+ string $language = self::ENGLISH_LANGUAGE_CODE,
+ bool $replace_extra_symbols = false,
+ bool $asOrigReplaceArray = true
+ ): array {
+ $language = self::get_language($language);
+
+ // init
+ /**
+ * @var array<string, array>
+ */
+ static $CHARS_ARRAY = [];
+ $cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray;
+
+ // check static cache
+ if (isset($CHARS_ARRAY[$cacheKey][$language])) {
+ return $CHARS_ARRAY[$cacheKey][$language];
+ }
+
+ if ($replace_extra_symbols) {
+ self::prepareAsciiAndExtrasMaps();
+
+ /** @noinspection DuplicatedCode */
+ if (isset(self::$ASCII_MAPS_AND_EXTRAS[$language])) {
+ $tmpArray = self::$ASCII_MAPS_AND_EXTRAS[$language];
+
+ if ($asOrigReplaceArray) {
+ $CHARS_ARRAY[$cacheKey][$language] = [
+ 'orig' => \array_keys($tmpArray),
+ 'replace' => \array_values($tmpArray),
+ ];
+ } else {
+ $CHARS_ARRAY[$cacheKey][$language] = $tmpArray;
+ }
+ } else {
+ /** @noinspection NestedPositiveIfStatementsInspection */
+ if ($asOrigReplaceArray) {
+ $CHARS_ARRAY[$cacheKey][$language] = [
+ 'orig' => [],
+ 'replace' => [],
+ ];
+ } else {
+ $CHARS_ARRAY[$cacheKey][$language] = [];
+ }
+ }
+ } else {
+ self::prepareAsciiMaps();
+
+ /** @noinspection DuplicatedCode */
+ if (isset(self::$ASCII_MAPS[$language])) {
+ $tmpArray = self::$ASCII_MAPS[$language];
+
+ if ($asOrigReplaceArray) {
+ $CHARS_ARRAY[$cacheKey][$language] = [
+ 'orig' => \array_keys($tmpArray),
+ 'replace' => \array_values($tmpArray),
+ ];
+ } else {
+ $CHARS_ARRAY[$cacheKey][$language] = $tmpArray;
+ }
+ } else {
+ /** @noinspection NestedPositiveIfStatementsInspection */
+ if ($asOrigReplaceArray) {
+ $CHARS_ARRAY[$cacheKey][$language] = [
+ 'orig' => [],
+ 'replace' => [],
+ ];
+ } else {
+ $CHARS_ARRAY[$cacheKey][$language] = [];
+ }
+ }
+ }
+
+ return $CHARS_ARRAY[$cacheKey][$language] ?? ['orig' => [], 'replace' => []];
+ }
+
+ /**
+ * Returns an replacement array for ASCII methods with multiple languages.
+ *
+ * EXAMPLE: <code>
+ * $array = ASCII::charsArrayWithSingleLanguageValues();
+ * $tmpKey = \array_search('hnaik', $array['replace']);
+ * echo $array['orig'][$tmpKey]; // '၌'
+ * </code>
+ *
+ * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
+ * @param bool $asOrigReplaceArray [optional] <p>TRUE === return {orig: string[], replace: string[]}
+ * array</p>
+ *
+ * @psalm-pure
+ *
+ * @return array
+ * <p>An array of replacements.</p>
+ *
+ * @psalm-return array{orig: string[], replace: string[]}|array<string, string>
+ */
+ public static function charsArrayWithSingleLanguageValues(
+ bool $replace_extra_symbols = false,
+ bool $asOrigReplaceArray = true
+ ): array {
+ // init
+ /**
+ * @var array<string,array>
+ */
+ static $CHARS_ARRAY = [];
+ $cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray;
+
+ if (isset($CHARS_ARRAY[$cacheKey])) {
+ return $CHARS_ARRAY[$cacheKey];
+ }
+
+ if ($replace_extra_symbols) {
+ self::prepareAsciiAndExtrasMaps();
+
+ /** @noinspection AlterInForeachInspection */
+ /** @psalm-suppress PossiblyNullIterator - we use the prepare* methods here, so we don't get NULL here */
+ foreach (self::$ASCII_MAPS_AND_EXTRAS ?? [] as &$map) {
+ $CHARS_ARRAY[$cacheKey][] = $map;
+ }
+ } else {
+ self::prepareAsciiMaps();
+
+ /** @noinspection AlterInForeachInspection */
+ /** @psalm-suppress PossiblyNullIterator - we use the prepare* methods here, so we don't get NULL here */
+ foreach (self::$ASCII_MAPS ?? [] as &$map) {
+ $CHARS_ARRAY[$cacheKey][] = $map;
+ }
+ }
+
+ $CHARS_ARRAY[$cacheKey] = \array_merge([], ...$CHARS_ARRAY[$cacheKey]);
+
+ if ($asOrigReplaceArray) {
+ $CHARS_ARRAY[$cacheKey] = [
+ 'orig' => \array_keys($CHARS_ARRAY[$cacheKey]),
+ 'replace' => \array_values($CHARS_ARRAY[$cacheKey]),
+ ];
+ }
+
+ return $CHARS_ARRAY[$cacheKey];
+ }
+
+ /**
+ * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
+ *
+ * @param string $str <p>The string to be sanitized.</p>
+ * @param bool $normalize_whitespace [optional] <p>Set to true, if you need to normalize the
+ * whitespace.</p>
+ * @param bool $normalize_msword [optional] <p>Set to true, if you need to normalize MS Word chars
+ * e.g.: "…"
+ * => "..."</p>
+ * @param bool $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in
+ * combination with
+ * $normalize_whitespace</p>
+ * @param bool $remove_invisible_characters [optional] <p>Set to false, if you not want to remove invisible
+ * characters e.g.: "\0"</p>
+ *
+ * @psalm-pure
+ *
+ * @return string
+ * <p>A clean UTF-8 string.</p>
+ */
+ public static function clean(
+ string $str,
+ bool $normalize_whitespace = true,
+ bool $keep_non_breaking_space = false,
+ bool $normalize_msword = true,
+ bool $remove_invisible_characters = true
+ ): string {
+ // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
+ // caused connection reset problem on larger strings
+
+ $regex = '/
+ (
+ (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx
+ | [\xC0-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
+ | [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences 1110xxxx 10xxxxxx * 2
+ | [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
+ ){1,100} # ...one or more times
+ )
+ | ( [\x80-\xBF] ) # invalid byte in range 10000000 - 10111111
+ | ( [\xC0-\xFF] ) # invalid byte in range 11000000 - 11111111
+ /x';
+ $str = (string) \preg_replace($regex, '$1', $str);
+
+ if ($normalize_whitespace) {
+ $str = self::normalize_whitespace($str, $keep_non_breaking_space);
+ }
+
+ if ($normalize_msword) {
+ $str = self::normalize_msword($str);
+ }
+
+ if ($remove_invisible_characters) {
+ $str = self::remove_invisible_characters($str);
+ }
+
+ return $str;
+ }
+
+ /**
+ * Checks if a string is 7 bit ASCII.
+ *
+ * EXAMPLE: <code>
+ * ASCII::is_ascii('白'); // false
+ * </code>
+ *
+ * @param string $str <p>The string to check.</p>
+ *
+ * @psalm-pure
+ *
+ * @return bool
+ * <p>
+ * <strong>true</strong> if it is ASCII<br>
+ * <strong>false</strong> otherwise
+ * </p>
+ */
+ public static function is_ascii(string $str): bool
+ {
+ if ($str === '') {
+ return true;
+ }
+
+ return !\preg_match('/' . self::$REGEX_ASCII . '/', $str);
+ }
+
+ /**
+ * Returns a string with smart quotes, ellipsis characters, and dashes from
+ * Windows-1252 (commonly used in Word documents) replaced by their ASCII
+ * equivalents.
+ *
+ * EXAMPLE: <code>
+ * ASCII::normalize_msword('„Abcdef…”'); // '"Abcdef..."'
+ * </code>
+ *
+ * @param string $str <p>The string to be normalized.</p>
+ *
+ * @psalm-pure
+ *
+ * @return string
+ * <p>A string with normalized characters for commonly used chars in Word documents.</p>
+ */
+ public static function normalize_msword(string $str): string
+ {
+ if ($str === '') {
+ return '';
+ }
+
+ /**
+ * @var array{orig: string[], replace: string[]}
+ */
+ static $MSWORD_CACHE = ['orig' => [], 'replace' => []];
+
+ if (empty($MSWORD_CACHE['orig'])) {
+ self::prepareAsciiMaps();
+
+ /**
+ * @psalm-suppress PossiblyNullArrayAccess - we use the prepare* methods here, so we don't get NULL here
+ *
+ * @var array<string, string>
+ */
+ $map = self::$ASCII_MAPS[self::EXTRA_MSWORD_CHARS_LANGUAGE_CODE] ?? [];
+
+ $MSWORD_CACHE = [
+ 'orig' => \array_keys($map),
+ 'replace' => \array_values($map),
+ ];
+ }
+
+ return \str_replace($MSWORD_CACHE['orig'], $MSWORD_CACHE['replace'], $str);
+ }
+
+ /**
+ * Normalize the whitespace.
+ *
+ * EXAMPLE: <code>
+ * ASCII::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"
+ * </code>
+ *
+ * @param string $str <p>The string to be normalized.</p>
+ * @param bool $keepNonBreakingSpace [optional] <p>Set to true, to keep non-breaking-spaces.</p>
+ * @param bool $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
+ * bidirectional text chars.</p>
+ * @param bool $normalize_control_characters [optional] <p>Set to true, to convert LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
+ *
+ * @psalm-pure
+ *
+ * @return string
+ * <p>A string with normalized whitespace.</p>
+ */
+ public static function normalize_whitespace(
+ string $str,
+ bool $keepNonBreakingSpace = false,
+ bool $keepBidiUnicodeControls = false,
+ bool $normalize_control_characters = false
+ ): string {
+ if ($str === '') {
+ return '';
+ }
+
+ /**
+ * @var array<int,array<string,string>>
+ */
+ static $WHITESPACE_CACHE = [];
+ $cacheKey = (int) $keepNonBreakingSpace;
+
+ if ($normalize_control_characters) {
+ $str = \str_replace(
+ [
+ "\x0d\x0c", // 'END OF LINE'
+ "\xe2\x80\xa8", // 'LINE SEPARATOR'
+ "\xe2\x80\xa9", // 'PARAGRAPH SEPARATOR'
+ "\x0c", // 'FORM FEED'
+ "\x0d", // 'CARRIAGE RETURN'
+ "\x0b", // 'VERTICAL TAB'
+ ],
+ [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\t",
+ ],
+ $str
+ );
+ }
+
+ if (!isset($WHITESPACE_CACHE[$cacheKey])) {
+ self::prepareAsciiMaps();
+
+ $WHITESPACE_CACHE[$cacheKey] = self::$ASCII_MAPS[self::EXTRA_WHITESPACE_CHARS_LANGUAGE_CODE] ?? [];
+
+ if ($keepNonBreakingSpace) {
+ unset($WHITESPACE_CACHE[$cacheKey]["\xc2\xa0"]);
+ }
+
+ $WHITESPACE_CACHE[$cacheKey] = \array_keys($WHITESPACE_CACHE[$cacheKey]);
+ }
+
+ if (!$keepBidiUnicodeControls) {
+ /**
+ * @var array<int,string>|null
+ */
+ static $BIDI_UNICODE_CONTROLS_CACHE = null;
+
+ if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
+ $BIDI_UNICODE_CONTROLS_CACHE = self::$BIDI_UNI_CODE_CONTROLS_TABLE;
+ }
+
+ $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
+ }
+
+ return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
+ }
+
+ /**
+ * Remove invisible characters from a string.
+ *
+ * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
+ *
+ * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
+ *
+ * @param string $str
+ * @param bool $url_encoded
+ * @param string $replacement
+ * @param bool $keep_basic_control_characters
+ *
+ * @psalm-pure
+ *
+ * @return string
+ */
+ public static function remove_invisible_characters(
+ string $str,
+ bool $url_encoded = false,
+ string $replacement = '',
+ bool $keep_basic_control_characters = true
+ ): string {
+ // init
+ $non_displayables = [];
+
+ // every control character except:
+ // - newline (dec 10),
+ // - carriage return (dec 13),
+ // - horizontal tab (dec 09)
+ if ($url_encoded) {
+ $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
+ $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
+ }
+
+ if ($keep_basic_control_characters) {
+ $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
+ } else {
+ $str = self::normalize_whitespace($str, false, false, true);
+ $non_displayables[] = '/[^\P{C}\s]/u';
+ }
+
+ do {
+ $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
+ } while ($count !== 0);
+
+ return $str;
+ }
+
+ /**
+ * Returns an ASCII version of the string. A set of non-ASCII characters are
+ * replaced with their closest ASCII counterparts, and the rest are removed
+ * by default. The language or locale of the source string can be supplied
+ * for language-specific transliteration in any of the following formats:
+ * en, en_GB, or en-GB. For example, passing "de" results in "äöü" mapping
+ * to "aeoeue" rather than "aou" as in other languages.
+ *
+ * EXAMPLE: <code>
+ * ASCII::to_ascii('�Düsseldorf�', 'en'); // Dusseldorf
+ * </code>
+ *
+ * @param string $str <p>The input string.</p>
+ * @param string $language [optional] <p>Language of the source string.
+ * (default is 'en') | ASCII::*_LANGUAGE_CODE</p>
+ * @param bool $remove_unsupported_chars [optional] <p>Whether or not to remove the
+ * unsupported characters.</p>
+ * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound
+ * ".</p>
+ * @param bool $use_transliterate [optional] <p>Use ASCII::to_transliterate() for unknown chars.</p>
+ * @param bool|null $replace_single_chars_only [optional] <p>Single char replacement is better for the
+ * performance, but some languages need to replace more then one char
+ * at the same time. | NULL === auto-setting, depended on the
+ * language</p>
+ *
+ * @psalm-pure
+ *
+ * @return string
+ * <p>A string that contains only ASCII characters.</p>
+ */
+ public static function to_ascii(
+ string $str,
+ string $language = self::ENGLISH_LANGUAGE_CODE,
+ bool $remove_unsupported_chars = true,
+ bool $replace_extra_symbols = false,
+ bool $use_transliterate = false,
+ bool $replace_single_chars_only = null
+ ): string {
+ if ($str === '') {
+ return '';
+ }
+
+ $language = self::get_language($language);
+
+ static $EXTRA_SYMBOLS_CACHE = null;
+
+ /**
+ * @var array<string,array<string,string>>
+ */
+ static $REPLACE_HELPER_CACHE = [];
+ $cacheKey = $language . '-' . $replace_extra_symbols;
+
+ if (!isset($REPLACE_HELPER_CACHE[$cacheKey])) {
+ $langAll = self::charsArrayWithSingleLanguageValues($replace_extra_symbols, false);
+
+ $langSpecific = self::charsArrayWithOneLanguage($language, $replace_extra_symbols, false);
+
+ if ($langSpecific === []) {
+ $REPLACE_HELPER_CACHE[$cacheKey] = $langAll;
+ } else {
+ $REPLACE_HELPER_CACHE[$cacheKey] = \array_merge([], $langAll, $langSpecific);
+ }
+ }
+
+ if (
+ $replace_extra_symbols
+ &&
+ $EXTRA_SYMBOLS_CACHE === null
+ ) {
+ $EXTRA_SYMBOLS_CACHE = [];
+ foreach (self::$ASCII_EXTRAS ?? [] as $extrasLanguageTmp => $extrasDataTmp) {
+ foreach ($extrasDataTmp as $extrasDataKeyTmp => $extrasDataValueTmp) {
+ $EXTRA_SYMBOLS_CACHE[$extrasDataKeyTmp] = $extrasDataKeyTmp;
+ }
+ }
+ $EXTRA_SYMBOLS_CACHE = \implode('', $EXTRA_SYMBOLS_CACHE);
+ }
+
+ $charDone = [];
+ if (\preg_match_all('/' . self::$REGEX_ASCII . ($replace_extra_symbols ? '|[' . $EXTRA_SYMBOLS_CACHE . ']' : '') . '/u', $str, $matches)) {
+ if (!$replace_single_chars_only) {
+ if (self::$LANGUAGE_MAX_KEY === null) {
+ self::$LANGUAGE_MAX_KEY = self::getData('ascii_language_max_key');
+ }
+
+ $maxKeyLength = self::$LANGUAGE_MAX_KEY[$language] ?? 0;
+
+ if ($maxKeyLength >= 5) {
+ foreach ($matches[0] as $keyTmp => $char) {
+ if (isset($matches[0][$keyTmp + 4])) {
+ $fiveChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1] . $matches[0][$keyTmp + 2] . $matches[0][$keyTmp + 3] . $matches[0][$keyTmp + 4];
+ } else {
+ $fiveChars = null;
+ }
+ if (
+ $fiveChars
+ &&
+ !isset($charDone[$fiveChars])
+ &&
+ isset($REPLACE_HELPER_CACHE[$cacheKey][$fiveChars])
+ &&
+ \strpos($str, $fiveChars) !== false
+ ) {
+ // DEBUG
+ //\var_dump($str, $fiveChars, $REPLACE_HELPER_CACHE[$cacheKey][$fiveChars]);
+
+ $charDone[$fiveChars] = true;
+ $str = \str_replace($fiveChars, $REPLACE_HELPER_CACHE[$cacheKey][$fiveChars], $str);
+
+ // DEBUG
+ //\var_dump($str, "\n");
+ }
+ }
+ }
+
+ if ($maxKeyLength >= 4) {
+ foreach ($matches[0] as $keyTmp => $char) {
+ if (isset($matches[0][$keyTmp + 3])) {
+ $fourChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1] . $matches[0][$keyTmp + 2] . $matches[0][$keyTmp + 3];
+ } else {
+ $fourChars = null;
+ }
+ if (
+ $fourChars
+ &&
+ !isset($charDone[$fourChars])
+ &&
+ isset($REPLACE_HELPER_CACHE[$cacheKey][$fourChars])
+ &&
+ \strpos($str, $fourChars) !== false
+ ) {
+ // DEBUG
+ //\var_dump($str, $fourChars, $REPLACE_HELPER_CACHE[$cacheKey][$fourChars]);
+
+ $charDone[$fourChars] = true;
+ $str = \str_replace($fourChars, $REPLACE_HELPER_CACHE[$cacheKey][$fourChars], $str);
+
+ // DEBUG
+ //\var_dump($str, "\n");
+ }
+ }
+ }
+
+ foreach ($matches[0] as $keyTmp => $char) {
+ if (isset($matches[0][$keyTmp + 2])) {
+ $threeChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1] . $matches[0][$keyTmp + 2];
+ } else {
+ $threeChars = null;
+ }
+ if (
+ $threeChars
+ &&
+ !isset($charDone[$threeChars])
+ &&
+ isset($REPLACE_HELPER_CACHE[$cacheKey][$threeChars])
+ &&
+ \strpos($str, $threeChars) !== false
+ ) {
+ // DEBUG
+ //\var_dump($str, $threeChars, $REPLACE_HELPER_CACHE[$cacheKey][$threeChars]);
+
+ $charDone[$threeChars] = true;
+ $str = \str_replace($threeChars, $REPLACE_HELPER_CACHE[$cacheKey][$threeChars], $str);
+
+ // DEBUG
+ //\var_dump($str, "\n");
+ }
+ }
+
+ foreach ($matches[0] as $keyTmp => $char) {
+ if (isset($matches[0][$keyTmp + 1])) {
+ $twoChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1];
+ } else {
+ $twoChars = null;
+ }
+ if (
+ $twoChars
+ &&
+ !isset($charDone[$twoChars])
+ &&
+ isset($REPLACE_HELPER_CACHE[$cacheKey][$twoChars])
+ &&
+ \strpos($str, $twoChars) !== false
+ ) {
+ // DEBUG
+ //\var_dump($str, $twoChars, $REPLACE_HELPER_CACHE[$cacheKey][$twoChars]);
+
+ $charDone[$twoChars] = true;
+ $str = \str_replace($twoChars, $REPLACE_HELPER_CACHE[$cacheKey][$twoChars], $str);
+
+ // DEBUG
+ //\var_dump($str, "\n");
+ }
+ }
+ }
+
+ foreach ($matches[0] as $keyTmp => $char) {
+ if (
+ !isset($charDone[$char])
+ &&
+ isset($REPLACE_HELPER_CACHE[$cacheKey][$char])
+ &&
+ \strpos($str, $char) !== false
+ ) {
+ // DEBUG
+ //\var_dump($str, $char, $REPLACE_HELPER_CACHE[$cacheKey][$char]);
+
+ $charDone[$char] = true;
+ $str = \str_replace($char, $REPLACE_HELPER_CACHE[$cacheKey][$char], $str);
+
+ // DEBUG
+ //\var_dump($str, "\n");
+ }
+ }
+ }
+
+ /** @psalm-suppress PossiblyNullOperand - we use the prepare* methods here, so we don't get NULL here */
+ if (!isset(self::$ASCII_MAPS[$language])) {
+ $use_transliterate = true;
+ }
+
+ if ($use_transliterate) {
+ /** @noinspection ArgumentEqualsDefaultValueInspection */
+ $str = self::to_transliterate($str, null, false);
+ }
+
+ if ($remove_unsupported_chars) {
+ $str = (string) \str_replace(["\n\r", "\n", "\r", "\t"], ' ', $str);
+ $str = (string) \preg_replace('/' . self::$REGEX_ASCII . '/', '', $str);
+ }
+
+ return $str;
+ }
+
+ /**
+ * Convert given string to safe filename (and keep string case).
+ *
+ * EXAMPLE: <code>
+ * ASCII::to_filename('שדגשדג.png', true)); // 'shdgshdg.png'
+ * </code>
+ *
+ * @param string $str
+ * @param bool $use_transliterate <p>ASCII::to_transliterate() is used by default - unsafe characters are
+ * simply replaced with hyphen otherwise.</p>
+ * @param string $fallback_char
+ *
+ * @psalm-pure
+ *
+ * @return string
+ * <p>A string that contains only safe characters for a filename.</p>
+ */
+ public static function to_filename(
+ string $str,
+ bool $use_transliterate = true,
+ string $fallback_char = '-'
+ ): string {
+ if ($use_transliterate) {
+ $str = self::to_transliterate($str, $fallback_char);
+ }
+
+ $fallback_char_escaped = \preg_quote($fallback_char, '/');
+
+ $str = (string) \preg_replace(
+ [
+ '/[^' . $fallback_char_escaped . '.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars
+ '/[\\s]+/u', // 2) convert spaces to $fallback_char
+ '/[' . $fallback_char_escaped . ']+/u', // 3) remove double $fallback_char's
+ ],
+ [
+ '',
+ $fallback_char,
+ $fallback_char,
+ ],
+ $str
+ );
+
+ return \trim($str, $fallback_char);
+ }
+
+ /**
+ * Converts the string into an URL slug. This includes replacing non-ASCII
+ * characters with their closest ASCII equivalents, removing remaining
+ * non-ASCII and non-alphanumeric characters, and replacing whitespace with
+ * $separator. The separator defaults to a single dash, and the string
+ * is also converted to lowercase. The language of the source string can
+ * also be supplied for language-specific transliteration.
+ *
+ * @param string $str
+ * @param string $separator [optional] <p>The string used to replace whitespace.</p>
+ * @param string $language [optional] <p>Language of the source string.
+ * (default is 'en') | ASCII::*_LANGUAGE_CODE</p>
+ * @param array<string, string> $replacements [optional] <p>A map of replaceable strings.</p>
+ * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with "
+ * pound ".</p>
+ * @param bool $use_str_to_lower [optional] <p>Use "string to lower" for the input.</p>
+ * @param bool $use_transliterate [optional] <p>Use ASCII::to_transliterate() for unknown
+ * chars.</p>
+ * @psalm-pure
+ *
+ * @return string
+ * <p>A string that has been converted to an URL slug.</p>
+ */
+ public static function to_slugify(
+ string $str,
+ string $separator = '-',
+ string $language = self::ENGLISH_LANGUAGE_CODE,
+ array $replacements = [],
+ bool $replace_extra_symbols = false,
+ bool $use_str_to_lower = true,
+ bool $use_transliterate = false
+ ): string {
+ if ($str === '') {
+ return '';
+ }
+
+ foreach ($replacements as $from => $to) {
+ $str = \str_replace($from, $to, $str);
+ }
+
+ $str = self::to_ascii(
+ $str,
+ $language,
+ false,
+ $replace_extra_symbols,
+ $use_transliterate
+ );
+
+ $str = \str_replace('@', $separator, $str);
+
+ $str = (string) \preg_replace(
+ '/[^a-zA-Z\\d\\s\\-_' . \preg_quote($separator, '/') . ']/',
+ '',
+ $str
+ );
+
+ if ($use_str_to_lower) {
+ $str = \strtolower($str);
+ }
+
+ $str = (string) \preg_replace('/^[\'\\s]+|[\'\\s]+$/', '', $str);
+ $str = (string) \preg_replace('/\\B([A-Z])/', '-\1', $str);
+ $str = (string) \preg_replace('/[\\-_\\s]+/', $separator, $str);
+
+ $l = \strlen($separator);
+ if ($l && \strpos($str, $separator) === 0) {
+ $str = (string) \substr($str, $l);
+ }
+
+ if (\substr($str, -$l) === $separator) {
+ $str = (string) \substr($str, 0, \strlen($str) - $l);
+ }
+
+ return $str;
+ }
+
+ /**
+ * Returns an ASCII version of the string. A set of non-ASCII characters are
+ * replaced with their closest ASCII counterparts, and the rest are removed
+ * unless instructed otherwise.
+ *
+ * EXAMPLE: <code>
+ * ASCII::to_transliterate('déjà σσς iıii'); // 'deja sss iiii'
+ * </code>
+ *
+ * @param string $str <p>The input string.</p>
+ * @param string|null $unknown [optional] <p>Character use if character unknown. (default is '?')
+ * But you can also use NULL to keep the unknown chars.</p>
+ * @param bool $strict [optional] <p>Use "transliterator_transliterate()" from PHP-Intl
+ *
+ * @psalm-pure
+ *
+ * @return string
+ * <p>A String that contains only ASCII characters.</p>
+ *
+ * @noinspection ParameterDefaultValueIsNotNullInspection
+ */
+ public static function to_transliterate(
+ string $str,
+ $unknown = '?',
+ bool $strict = false
+ ): string {
+ /**
+ * @var array<int,string>|null
+ */
+ static $UTF8_TO_TRANSLIT = null;
+
+ /**
+ * null|\Transliterator
+ */
+ static $TRANSLITERATOR = null;
+
+ /**
+ * @var bool|null
+ */
+ static $SUPPORT_INTL = null;
+
+ if ($str === '') {
+ return '';
+ }
+
+ if ($SUPPORT_INTL === null) {
+ $SUPPORT_INTL = \extension_loaded('intl');
+ }
+
+ // check if we only have ASCII, first (better performance)
+ $str_tmp = $str;
+ if (self::is_ascii($str)) {
+ return $str;
+ }
+
+ $str = self::clean($str);
+
+ // check again, if we only have ASCII, now ...
+ if (
+ $str_tmp !== $str
+ &&
+ self::is_ascii($str)
+ ) {
+ return $str;
+ }
+
+ if (
+ $strict
+ &&
+ $SUPPORT_INTL === true
+ ) {
+ if (!isset($TRANSLITERATOR)) {
+ // INFO: see "*-Latin" rules via "transliterator_list_ids()"
+ /**
+ * @var \Transliterator
+ */
+ $TRANSLITERATOR = \transliterator_create('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;');
+ }
+
+ // INFO: https://unicode.org/cldr/utility/character.jsp
+ $str_tmp = \transliterator_transliterate($TRANSLITERATOR, $str);
+
+ if ($str_tmp !== false) {
+
+ // check again, if we only have ASCII, now ...
+ if (
+ $str_tmp !== $str
+ &&
+ self::is_ascii($str_tmp)
+ ) {
+ return $str_tmp;
+ }
+
+ $str = $str_tmp;
+ }
+ }
+
+ if (self::$ORD === null) {
+ self::$ORD = self::getData('ascii_ord');
+ }
+
+ \preg_match_all('/.|[^\x00]$/us', $str, $array_tmp);
+ $chars = $array_tmp[0];
+ $ord = null;
+ $str_tmp = '';
+ foreach ($chars as &$c) {
+ $ordC0 = self::$ORD[$c[0]];
+
+ if ($ordC0 >= 0 && $ordC0 <= 127) {
+ $str_tmp .= $c;
+
+ continue;
+ }
+
+ $ordC1 = self::$ORD[$c[1]];
+
+ // ASCII - next please
+ if ($ordC0 >= 192 && $ordC0 <= 223) {
+ $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
+ }
+
+ if ($ordC0 >= 224) {
+ $ordC2 = self::$ORD[$c[2]];
+
+ if ($ordC0 <= 239) {
+ $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
+ }
+
+ if ($ordC0 >= 240) {
+ $ordC3 = self::$ORD[$c[3]];
+
+ if ($ordC0 <= 247) {
+ $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
+ }
+
+ // We only process valid UTF-8 chars (<= 4 byte), so we don't need this code here ...
+ /*
+ if ($ordC0 >= 248) {
+ $ordC4 = self::$ORD[$c[4]];
+
+ if ($ordC0 <= 251) {
+ $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
+ }
+
+ if ($ordC0 >= 252) {
+ $ordC5 = self::$ORD[$c[5]];
+
+ if ($ordC0 <= 253) {
+ $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
+ }
+ }
+ }
+ */
+ }
+ }
+
+ if (
+ $ordC0 === 254
+ ||
+ $ordC0 === 255
+ ||
+ $ord === null
+ ) {
+ $str_tmp .= $unknown ?? $c;
+
+ continue;
+ }
+
+ $bank = $ord >> 8;
+ if (!isset($UTF8_TO_TRANSLIT[$bank])) {
+ $UTF8_TO_TRANSLIT[$bank] = self::getDataIfExists(\sprintf('x%03x', $bank));
+ }
+
+ $new_char = $ord & 255;
+
+ if (isset($UTF8_TO_TRANSLIT[$bank][$new_char])) {
+
+ // keep for debugging
+ /*
+ echo "file: " . sprintf('x%02x', $bank) . "\n";
+ echo "char: " . $c . "\n";
+ echo "ord: " . $ord . "\n";
+ echo "new_char: " . $new_char . "\n";
+ echo "new_char: " . mb_chr($new_char) . "\n";
+ echo "ascii: " . $UTF8_TO_TRANSLIT[$bank][$new_char] . "\n";
+ echo "bank:" . $bank . "\n\n";
+ */
+
+ $new_char = $UTF8_TO_TRANSLIT[$bank][$new_char];
+
+ /** @noinspection MissingOrEmptyGroupStatementInspection */
+ /** @noinspection PhpStatementHasEmptyBodyInspection */
+ if ($unknown === null && $new_char === '') {
+ // nothing
+ } elseif (
+ $new_char === '[?]'
+ ||
+ $new_char === '[?] '
+ ) {
+ $c = $unknown ?? $c;
+ } else {
+ $c = $new_char;
+ }
+ } else {
+
+ // keep for debugging missing chars
+ /*
+ echo "file: " . sprintf('x%02x', $bank) . "\n";
+ echo "char: " . $c . "\n";
+ echo "ord: " . $ord . "\n";
+ echo "new_char: " . $new_char . "\n";
+ echo "new_char: " . mb_chr($new_char) . "\n";
+ echo "bank:" . $bank . "\n\n";
+ */
+
+ $c = $unknown ?? $c;
+ }
+
+ $str_tmp .= $c;
+ }
+
+ return $str_tmp;
+ }
+
+ /**
+ * Get the language from a string.
+ *
+ * e.g.: de_at -> de_at
+ * de_DE -> de
+ * DE_DE -> de
+ * de-de -> de
+ *
+ * @noinspection ReturnTypeCanBeDeclaredInspection
+ *
+ * @param string $language
+ *
+ * @psalm-pure
+ *
+ * @return string
+ */
+ private static function get_language(string $language)
+ {
+ if ($language === '') {
+ return '';
+ }
+
+ if (
+ \strpos($language, '_') === false
+ &&
+ \strpos($language, '-') === false
+ ) {
+ return \strtolower($language);
+ }
+
+ $language = \str_replace('-', '_', \strtolower($language));
+
+ $regex = '/(?<first>[a-z]+)_\g{first}/';
+
+ return (string) \preg_replace($regex, '$1', $language);
+ }
+
+ /**
+ * Get data from "/data/*.php".
+ *
+ * @noinspection ReturnTypeCanBeDeclaredInspection
+ *
+ * @param string $file
+ *
+ * @psalm-pure
+ *
+ * @return array<mixed>
+ */
+ private static function getData(string $file)
+ {
+ /** @noinspection PhpIncludeInspection */
+ /** @noinspection UsingInclusionReturnValueInspection */
+ /** @psalm-suppress UnresolvableInclude */
+ return include __DIR__ . '/data/' . $file . '.php';
+ }
+
+ /**
+ * Get data from "/data/*.php".
+ *
+ * @param string $file
+ *
+ * @psalm-pure
+ *
+ * @return array<mixed>
+ */
+ private static function getDataIfExists(string $file): array
+ {
+ $file = __DIR__ . '/data/' . $file . '.php';
+ /** @psalm-suppress ImpureFunctionCall */
+ if (\is_file($file)) {
+ /** @noinspection PhpIncludeInspection */
+ /** @noinspection UsingInclusionReturnValueInspection */
+ /** @psalm-suppress UnresolvableInclude */
+ return include $file;
+ }
+
+ return [];
+ }
+
+ /**
+ * @psalm-pure
+ *
+ * @return void
+ */
+ private static function prepareAsciiAndExtrasMaps()
+ {
+ if (self::$ASCII_MAPS_AND_EXTRAS === null) {
+ self::prepareAsciiMaps();
+ self::prepareAsciiExtras();
+
+ /** @psalm-suppress PossiblyNullArgument - we use the prepare* methods here, so we don't get NULL here */
+ self::$ASCII_MAPS_AND_EXTRAS = \array_merge_recursive(
+ self::$ASCII_MAPS ?? [],
+ self::$ASCII_EXTRAS ?? []
+ );
+ }
+ }
+
+ /**
+ * @psalm-pure
+ *
+ * @return void
+ */
+ private static function prepareAsciiMaps()
+ {
+ if (self::$ASCII_MAPS === null) {
+ self::$ASCII_MAPS = self::getData('ascii_by_languages');
+ }
+ }
+
+ /**
+ * @psalm-pure
+ *
+ * @return void
+ */
+ private static function prepareAsciiExtras()
+ {
+ if (self::$ASCII_EXTRAS === null) {
+ self::$ASCII_EXTRAS = self::getData('ascii_extras_by_languages');
+ }
+ }
+}