diff options
Diffstat (limited to 'vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php')
-rw-r--r-- | vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php | 2929 |
1 files changed, 2929 insertions, 0 deletions
diff --git a/vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php new file mode 100644 index 000000000..d51f557a1 --- /dev/null +++ b/vendor/voku/portable-ascii/src/voku/helper/data/ascii_by_languages.php @@ -0,0 +1,2929 @@ +<?php + +// https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes + +return [ + // Dutch (Flemish) + 'nl' => [ + 'Á' => 'A', + 'á' => 'a', + 'Ä' => 'A', + 'ä' => 'a', + 'À' => 'A', + 'à' => 'a', + 'Â' => 'A', + 'â' => 'a', + 'É' => 'E', + 'é' => 'e', + 'Ë' => 'E', + 'ë' => 'e', + 'È' => 'E', + 'è' => 'e', + 'Ê' => 'E', + 'ê' => 'e', + 'Í' => 'I', + 'í' => 'i', + 'Ï' => 'I', + 'ï' => 'i', + 'Ì' => 'I', + 'ì' => 'i', + 'Î' => 'I', + 'î' => 'i', + 'Ó' => 'O', + 'ó' => 'o', + 'Ö' => 'O', + 'ö' => 'o', + 'Ò' => 'O', + 'ò' => 'o', + 'Ô' => 'O', + 'ô' => 'o', + 'Ú' => 'U', + 'ú' => 'u', + 'Ü' => 'U', + 'ü' => 'u', + 'Ù' => 'U', + 'ù' => 'u', + 'Û' => 'U', + 'û' => 'u', + 'Ý' => 'Y', + 'ý' => 'y', + 'Ÿ' => 'Y', + ], + // Italian + 'it' => [ + 'à' => 'a', + 'À' => 'A', + 'é' => 'e', + 'É' => 'E', + 'è' => 'e', + 'È' => 'E', + 'ì' => 'i', + 'Ì' => 'I', + 'Ò' => 'O', + 'ò' => 'o', + 'ù' => 'u', + 'Ù' => 'U', + ], + // Macedonian + 'mk' => [ + 'А' => 'A', + 'Б' => 'B', + 'В' => 'V', + 'Г' => 'G', + 'Д' => 'D', + 'Ѓ' => 'Gj', + 'Е' => 'E', + 'Ж' => 'Zh', + 'З' => 'Z', + 'Ѕ' => 'Dz', + 'И' => 'I', + 'Ј' => 'J', + 'К' => 'K', + 'Л' => 'L', + 'Љ' => 'Lj', + 'М' => 'M', + 'Н' => 'N', + 'Њ' => 'Nj', + 'О' => 'O', + 'П' => 'P', + 'Р' => 'R', + 'С' => 'S', + 'Т' => 'T', + 'Ќ' => 'Kj', + 'У' => 'U', + 'Ф' => 'F', + 'Х' => 'H', + 'Ц' => 'C', + 'Ч' => 'Ch', + 'Џ' => 'Dj', + 'Ш' => 'Sh', + 'а' => 'a', + 'б' => 'b', + 'в' => 'v', + 'г' => 'g', + 'д' => 'd', + 'ѓ' => 'gj', + 'е' => 'e', + 'ж' => 'zh', + 'з' => 'z', + 'ѕ' => 'dz', + 'и' => 'i', + 'ј' => 'j', + 'к' => 'k', + 'л' => 'l', + 'љ' => 'lj', + 'м' => 'm', + 'н' => 'n', + 'њ' => 'nj', + 'о' => 'o', + 'п' => 'p', + 'р' => 'r', + 'с' => 's', + 'т' => 't', + 'ќ' => 'kj', + 'у' => 'u', + 'ф' => 'f', + 'х' => 'h', + 'ц' => 'c', + 'ч' => 'ch', + 'џ' => 'dj', + 'ш' => 'sh', + ], + // Portuguese (Brazil) + 'pt' => [ + 'æ' => 'ae', + 'ǽ' => 'ae', + 'À' => 'A', + 'Á' => 'A', + 'Â' => 'A', + 'Ã' => 'A', + 'Å' => 'AA', + 'Ǻ' => 'A', + 'Ă' => 'A', + 'Ǎ' => 'A', + 'Æ' => 'AE', + 'Ǽ' => 'AE', + 'à' => 'a', + 'á' => 'a', + 'â' => 'a', + 'ã' => 'a', + 'å' => 'aa', + 'ǻ' => 'a', + 'ă' => 'a', + 'ǎ' => 'a', + 'ª' => 'a', + 'Ĉ' => 'C', + 'Ċ' => 'C', + 'Ç' => 'C', + 'ç' => 'c', + 'ĉ' => 'c', + 'ċ' => 'c', + 'Ð' => 'Dj', + 'Đ' => 'D', + 'ð' => 'dj', + 'đ' => 'd', + 'È' => 'E', + 'É' => 'E', + 'Ê' => 'E', + 'Ë' => 'E', + 'Ĕ' => 'E', + 'Ė' => 'E', + 'è' => 'e', + 'é' => 'e', + 'ê' => 'e', + 'ë' => 'e', + 'ĕ' => 'e', + 'ė' => 'e', + 'ƒ' => 'f', + 'Ĝ' => 'G', + 'Ġ' => 'G', + 'ĝ' => 'g', + 'ġ' => 'g', + 'Ĥ' => 'H', + 'Ħ' => 'H', + 'ĥ' => 'h', + 'ħ' => 'h', + 'Ì' => 'I', + 'Í' => 'I', + 'Î' => 'I', + 'Ï' => 'I', + 'Ĩ' => 'I', + 'Ĭ' => 'I', + 'Ǐ' => 'I', + 'Į' => 'I', + 'IJ' => 'IJ', + 'ì' => 'i', + 'í' => 'i', + 'î' => 'i', + 'ï' => 'i', + 'ĩ' => 'i', + 'ĭ' => 'i', + 'ǐ' => 'i', + 'į' => 'i', + 'ij' => 'ij', + 'Ĵ' => 'J', + 'ĵ' => 'j', + 'Ĺ' => 'L', + 'Ľ' => 'L', + 'Ŀ' => 'L', + 'ĺ' => 'l', + 'ľ' => 'l', + 'ŀ' => 'l', + 'Ñ' => 'N', + 'ñ' => 'n', + 'ʼn' => 'n', + 'Ò' => 'O', + 'Ó' => 'O', + 'Ô' => 'O', + 'Õ' => 'O', + 'Ō' => 'O', + 'Ŏ' => 'O', + 'Ǒ' => 'O', + 'Ő' => 'O', + 'Ơ' => 'O', + 'Ø' => 'OE', + 'Ǿ' => 'O', + 'Œ' => 'OE', + 'ò' => 'o', + 'ó' => 'o', + 'ô' => 'o', + 'õ' => 'o', + 'ō' => 'o', + 'ŏ' => 'o', + 'ǒ' => 'o', + 'ő' => 'o', + 'ơ' => 'o', + 'ø' => 'oe', + 'ǿ' => 'o', + 'º' => 'o', + 'œ' => 'oe', + 'Ŕ' => 'R', + 'Ŗ' => 'R', + 'ŕ' => 'r', + 'ŗ' => 'r', + 'Ŝ' => 'S', + 'Ș' => 'S', + 'ŝ' => 's', + 'ș' => 's', + 'ſ' => 's', + 'Ţ' => 'T', + 'Ț' => 'T', + 'Ŧ' => 'T', + 'Þ' => 'TH', + 'ţ' => 't', + 'ț' => 't', + 'ŧ' => 't', + 'þ' => 'th', + 'Ù' => 'U', + 'Ú' => 'U', + 'Û' => 'U', + 'Ü' => 'U', + 'Ũ' => 'U', + 'Ŭ' => 'U', + 'Ű' => 'U', + 'Ų' => 'U', + 'Ư' => 'U', + 'Ǔ' => 'U', + 'Ǖ' => 'U', + 'Ǘ' => 'U', + 'Ǚ' => 'U', + 'Ǜ' => 'U', + 'ù' => 'u', + 'ú' => 'u', + 'û' => 'u', + 'ü' => 'u', + 'ũ' => 'u', + 'ŭ' => 'u', + 'ű' => 'u', + 'ų' => 'u', + 'ư' => 'u', + 'ǔ' => 'u', + 'ǖ' => 'u', + 'ǘ' => 'u', + 'ǚ' => 'u', + 'ǜ' => 'u', + 'Ŵ' => 'W', + 'ŵ' => 'w', + 'Ý' => 'Y', + 'Ÿ' => 'Y', + 'Ŷ' => 'Y', + 'ý' => 'y', + 'ÿ' => 'y', + 'ŷ' => 'y', + ], + // Greek(lish) (Elláda) + 'el__greeklish' => [ + 'ΑΥ' => 'AU', + 'ΑΎ' => 'AU', + 'Αυ' => 'Au', + 'Αύ' => 'Au', + 'ΕΊ' => 'EI', + 'ΕΙ' => 'EI', + 'Ει' => 'EI', + 'ΕΥ' => 'EU', + 'ΕΎ' => 'EU', + 'Εί' => 'Ei', + 'Ευ' => 'Eu', + 'Εύ' => 'Eu', + 'ΟΙ' => 'OI', + 'ΟΊ' => 'OI', + 'ΟΥ' => 'OU', + 'ΟΎ' => 'OU', + 'Οι' => 'Oi', + 'Οί' => 'Oi', + 'Ου' => 'Ou', + 'Ού' => 'Ou', + 'ΥΙ' => 'YI', + 'ΎΙ' => 'YI', + 'Υι' => 'Yi', + 'Ύι' => 'Yi', + 'ΥΊ' => 'Yi', + 'Υί' => 'Yi', + 'αυ' => 'au', + 'αύ' => 'au', + 'εί' => 'ei', + 'ει' => 'ei', + 'ευ' => 'eu', + 'εύ' => 'eu', + 'οι' => 'oi', + 'οί' => 'oi', + 'ου' => 'ou', + 'ού' => 'ou', + 'υι' => 'yi', + 'ύι' => 'yi', + 'υί' => 'yi', + 'Α' => 'A', + 'Ά' => 'A', + 'Β' => 'B', + 'Δ' => 'D', + 'Ε' => 'E', + 'Έ' => 'E', + 'Φ' => 'F', + 'Γ' => 'G', + 'Η' => 'H', + 'Ή' => 'H', + 'Ι' => 'I', + 'Ί' => 'I', + 'Ϊ' => 'I', + 'Κ' => 'K', + 'Ξ' => 'Ks', + 'Λ' => 'L', + 'Μ' => 'M', + 'Ν' => 'N', + 'Π' => 'N', + 'Ο' => 'O', + 'Ό' => 'O', + 'Ψ' => 'Ps', + 'Ρ' => 'R', + 'Σ' => 'S', + 'Τ' => 'T', + 'Θ' => 'Th', + 'Ω' => 'W', + 'Ώ' => 'W', + 'Χ' => 'X', + 'ϒ' => 'Y', + 'Υ' => 'Y', + 'Ύ' => 'Y', + 'Ϋ' => 'Y', + 'Ζ' => 'Z', + 'α' => 'a', + 'ά' => 'a', + 'β' => 'b', + 'δ' => 'd', + 'ε' => 'e', + 'έ' => 'e', + 'φ' => 'f', + 'γ' => 'g', + 'η' => 'h', + 'ή' => 'h', + 'ι' => 'i', + 'ί' => 'i', + 'ϊ' => 'i', + 'ΐ' => 'i', + 'κ' => 'k', + 'ξ' => 'ks', + 'λ' => 'l', + 'μ' => 'm', + 'ν' => 'n', + 'ο' => 'o', + 'ό' => 'o', + 'π' => 'p', + 'ψ' => 'ps', + 'ρ' => 'r', + 'σ' => 's', + 'ς' => 's', + 'τ' => 't', + 'ϑ' => 'th', + 'θ' => 'th', + 'ϐ' => 'v', + 'ω' => 'w', + 'ώ' => 'w', + 'χ' => 'x', + 'υ' => 'y', + 'ύ' => 'y', + 'ΰ' => 'y', + 'ϋ' => 'y', + 'ζ' => 'z', + ], + // Greek (Elláda) + 'el' => [ + 'ΑΥ' => 'AU', + 'Αυ' => 'Au', + 'ΟΥ' => 'U', + 'Ου' => 'u', + 'ΕΥ' => 'EF', + 'Ευ' => 'Ef', + 'ΕΙ' => 'I', + 'Ει' => 'I', + 'ΟΙ' => 'I', + 'Οι' => 'I', + 'ΥΙ' => 'I', + 'Υι' => 'I', + 'ΑΎ' => 'AU', + 'Αύ' => 'Au', + 'ΟΎ' => 'OU', + 'Ού' => 'Ou', + 'ΕΎ' => 'EU', + 'Εύ' => 'Eu', + 'ΕΊ' => 'I', + 'Εί' => 'I', + 'ΟΊ' => 'I', + 'Οί' => 'I', + 'ΎΙ' => 'I', + 'Ύι' => 'I', + 'ΥΊ' => 'I', + 'Υί' => 'I', + 'αυ' => 'au', + 'ου' => 'u', + 'ευ' => 'ef', + 'ει' => 'i', + 'οι' => 'i', + 'υι' => 'i', + 'αύ' => 'au', + 'ού' => 'ou', + 'εύ' => 'eu', + 'εί' => 'i', + 'οί' => 'i', + 'ύι' => 'i', + 'υί' => 'i', + 'α' => 'a', + 'β' => 'v', + 'γ' => 'gh', + 'δ' => 'd', + 'ε' => 'e', + 'ζ' => 'z', + 'η' => 'i', + 'θ' => 'th', + 'ι' => 'i', + 'κ' => 'k', + 'λ' => 'l', + 'μ' => 'm', + 'ν' => 'n', + 'ξ' => 'ks', + 'ο' => 'o', + 'π' => 'p', + 'ρ' => 'r', + 'σ' => 's', + 'τ' => 't', + 'υ' => 'i', + 'φ' => 'f', + 'χ' => 'kh', + 'ψ' => 'ps', + 'ω' => 'o', + 'ά' => 'a', + 'έ' => 'e', + 'ί' => 'i', + 'ό' => 'o', + 'ϒ' => 'Y', + 'ύ' => 'y', + 'ή' => 'i', + 'ώ' => 'w', + 'ς' => 's', + 'ϊ' => 'i', + 'ΰ' => 'y', + 'ϋ' => 'y', + 'ΐ' => 'i', + 'Α' => 'A', + 'Β' => 'B', + 'Γ' => 'G', + 'Δ' => 'D', + 'Ε' => 'E', + 'Ζ' => 'Z', + 'Η' => 'H', + 'Θ' => 'Th', + 'Ι' => 'I', + 'Κ' => 'K', + 'Λ' => 'L', + 'Μ' => 'M', + 'Ν' => 'N', + 'Ξ' => 'Ks', + 'Ο' => 'O', + 'Π' => 'P', + 'Ρ' => 'R', + 'Σ' => 'S', + 'Τ' => 'T', + 'Υ' => 'Y', + 'Φ' => 'F', + 'Χ' => 'X', + 'Ψ' => 'Ps', + 'Ω' => 'O', + 'Ά' => 'A', + 'Έ' => 'E', + 'Ί' => 'I', + 'Ό' => 'O', + 'Ύ' => 'Y', + 'Ή' => 'I', + 'Ώ' => 'W', + 'Ϊ' => 'I', + 'Ϋ' => 'Y', + 'ϐ' => 'v', + 'ϑ' => 'th', + ], + // Hindi + 'hi' => [ + 'अ' => 'a', + 'आ' => 'aa', + 'ए' => 'e', + 'ई' => 'ii', + 'ऍ' => 'ei', + 'ऎ' => 'ae', + 'ऐ' => 'ai', + 'इ' => 'i', + 'ओ' => 'o', + 'ऑ' => 'oi', + 'ऒ' => 'oii', + 'ऊ' => 'uu', + 'औ' => 'ou', + 'उ' => 'u', + 'ब' => 'B', + 'भ' => 'Bha', + 'च' => 'Ca', + 'छ' => 'Chha', + 'ड' => 'Da', + 'ढ' => 'Dha', + 'फ' => 'Fa', + 'फ़' => 'Fi', + 'ग' => 'Ga', + 'घ' => 'Gha', + 'ग़' => 'Ghi', + 'ह' => 'Ha', + 'ज' => 'Ja', + 'झ' => 'Jha', + 'क' => 'Ka', + 'ख' => 'Kha', + 'ख़' => 'Khi', + 'ल' => 'L', + 'ळ' => 'Li', + 'ऌ' => 'Li', + 'ऴ' => 'Lii', + 'ॡ' => 'Lii', + 'म' => 'Ma', + 'न' => 'Na', + 'ङ' => 'Na', + 'ञ' => 'Nia', + 'ण' => 'Nae', + 'ऩ' => 'Ni', + 'ॐ' => 'oms', + 'प' => 'Pa', + 'क़' => 'Qi', + 'र' => 'Ra', + 'ऋ' => 'Ri', + 'ॠ' => 'Ri', + 'ऱ' => 'Ri', + 'स' => 'Sa', + 'श' => 'Sha', + 'ष' => 'Shha', + 'ट' => 'Ta', + 'त' => 'Ta', + 'ठ' => 'Tha', + 'द' => 'Tha', + 'थ' => 'Tha', + 'ध' => 'Thha', + 'ड़' => 'ugDha', + 'ढ़' => 'ugDhha', + 'व' => 'Va', + 'य' => 'Ya', + 'य़' => 'Yi', + 'ज़' => 'Za', + ], + // Armenian + 'hy' => [ + 'Ա' => 'A', + 'Բ' => 'B', + 'Գ' => 'G', + 'Դ' => 'D', + 'Ե' => 'E', + 'Զ' => 'Z', + 'Է' => 'E', + 'Ը' => 'Y', + 'Թ' => 'Th', + 'Ժ' => 'Zh', + 'Ի' => 'I', + 'Լ' => 'L', + 'Խ' => 'Kh', + 'Ծ' => 'Ts', + 'Կ' => 'K', + 'Հ' => 'H', + 'Ձ' => 'Dz', + 'Ղ' => 'Gh', + 'Ճ' => 'Tch', + 'Մ' => 'M', + 'Յ' => 'Y', + 'Ն' => 'N', + 'Շ' => 'Sh', + 'Ո' => 'Vo', + 'Չ' => 'Ch', + 'Պ' => 'P', + 'Ջ' => 'J', + 'Ռ' => 'R', + 'Ս' => 'S', + 'Վ' => 'V', + 'Տ' => 'T', + 'Ր' => 'R', + 'Ց' => 'C', + 'Ւ' => 'u', + 'Փ' => 'Ph', + 'Ք' => 'Q', + 'և' => 'ev', + 'Օ' => 'O', + 'Ֆ' => 'F', + 'ա' => 'a', + 'բ' => 'b', + 'գ' => 'g', + 'դ' => 'd', + 'ե' => 'e', + 'զ' => 'z', + 'է' => 'e', + 'ը' => 'y', + 'թ' => 'th', + 'ժ' => 'zh', + 'ի' => 'i', + 'լ' => 'l', + 'խ' => 'kh', + 'ծ' => 'ts', + 'կ' => 'k', + 'հ' => 'h', + 'ձ' => 'dz', + 'ղ' => 'gh', + 'ճ' => 'tch', + 'մ' => 'm', + 'յ' => 'y', + 'ն' => 'n', + 'շ' => 'sh', + 'ո' => 'vo', + 'չ' => 'ch', + 'պ' => 'p', + 'ջ' => 'j', + 'ռ' => 'r', + 'ս' => 's', + 'վ' => 'v', + 'տ' => 't', + 'ր' => 'r', + 'ց' => 'c', + 'ւ' => 'u', + 'փ' => 'ph', + 'ք' => 'q', + 'օ' => 'o', + 'ֆ' => 'f', + ], + // Swedish + 'sv' => [ + 'Ä' => 'A', + 'ä' => 'a', + 'Å' => 'A', + 'å' => 'a', + 'Ö' => 'O', + 'ö' => 'o', + ], + // Turkmen + 'tk' => [ + 'Ç' => 'C', + 'Ä' => 'A', + 'Ž' => 'Z', + 'Ň' => 'N', + 'Ö' => 'O', + 'Ş' => 'S', + 'Ü' => 'U', + 'Ý' => 'Y', + 'ç' => 'c', + 'ä' => 'a', + 'ž' => 'z', + 'ň' => 'n', + 'ö' => 'o', + 'ş' => 's', + 'ü' => 'u', + 'ý' => 'y', + ], + // Turkish + 'tr' => [ + 'ň' => 'n', + 'Ň' => 'N', + 'ş' => 's', + 'Ş' => 'S', + 'ı' => 'i', + 'İ' => 'I', + 'ç' => 'c', + 'Ç' => 'C', + 'ä' => 'a', + 'Ä' => 'A', + 'ü' => 'u', + 'Ü' => 'U', + 'ö' => 'o', + 'Ö' => 'O', + 'ğ' => 'g', + 'Ğ' => 'G', + 'ý' => 'y', + 'Ý' => 'Y', + 'ž' => 'z', + 'Ž' => 'Z', + ], + // Bulgarian + 'bg' => [ + 'ьо' => 'yo', + 'А' => 'A', + 'Б' => 'B', + 'В' => 'V', + 'Г' => 'G', + 'Д' => 'D', + 'Е' => 'E', + 'Ж' => 'Zh', + 'З' => 'Z', + 'И' => 'I', + 'Й' => 'Y', + 'К' => 'K', + 'Л' => 'L', + 'М' => 'M', + 'Н' => 'N', + 'О' => 'O', + 'П' => 'P', + 'Р' => 'R', + 'С' => 'S', + 'Т' => 'T', + 'У' => 'U', + 'Ф' => 'F', + 'Х' => 'H', + 'Ц' => 'C', + 'Ч' => 'Ch', + 'Ш' => 'Sh', + 'Щ' => 'Sht', + 'Ъ' => 'A', + 'Ь' => '', + 'Ю' => 'Yu', + 'Я' => 'Ya', + 'а' => 'a', + 'б' => 'b', + 'в' => 'v', + 'г' => 'g', + 'д' => 'd', + 'е' => 'e', + 'ж' => 'zh', + 'з' => 'z', + 'и' => 'i', + 'й' => 'y', + 'к' => 'k', + 'л' => 'l', + 'м' => 'm', + 'н' => 'n', + 'о' => 'o', + 'п' => 'p', + 'р' => 'r', + 'с' => 's', + 'т' => 't', + 'у' => 'u', + 'ф' => 'f', + 'х' => 'h', + 'ц' => 'c', + 'ч' => 'ch', + 'ш' => 'sh', + 'щ' => 'sht', + 'ъ' => 'a', + 'ь' => '', + 'ю' => 'yu', + 'я' => 'ya', + ], + // Hungarian + 'hu' => [ + 'Á' => 'A', + 'Ē' => 'E', + 'É' => 'E', + 'Í' => 'I', + 'Ó' => 'O', + 'Ö' => 'O', + 'Ő' => 'O', + 'Ú' => 'U', + 'Ü' => 'U', + 'Ű' => 'U', + 'á' => 'a', + 'ē' => 'e', + 'é' => 'e', + 'í' => 'i', + 'ó' => 'o', + 'ö' => 'o', + 'ő' => 'o', + 'ú' => 'u', + 'ü' => 'u', + 'ű' => 'u', + ], + // Myanmar (Burmese) + 'my' => [ + 'န်ုပ်' => 'nub', + 'ောင်' => 'aung', + 'ိုက်' => 'aik', + 'ိုဒ်' => 'ok', + 'ိုင်' => 'aing', + 'ိုလ်' => 'ol', + 'ေါင်' => 'aung', + 'သြော' => 'aw', + 'ောက်' => 'auk', + 'ိတ်' => 'eik', + 'ုတ်' => 'ok', + 'ုန်' => 'on', + 'ေတ်' => 'it', + 'ုဒ်' => 'ait', + 'ာန်' => 'an', + 'ိန်' => 'ein', + 'ွတ်' => 'ut', + 'ေါ်' => 'aw', + 'ွန်' => 'un', + 'ိပ်' => 'eik', + 'ုပ်' => 'ok', + 'ွပ်' => 'ut', + 'ိမ်' => 'ein', + 'ုမ်' => 'on', + 'ော်' => 'aw', + 'ွမ်' => 'un', + 'က်' => 'et', + 'ေါ' => 'aw', + 'ော' => 'aw', + 'ျွ' => 'ywa', + 'ြွ' => 'yw', + 'ို' => 'o', + 'ုံ' => 'on', + 'တ်' => 'at', + 'င်' => 'in', + 'ည်' => 'i', + 'ဒ်' => 'd', + 'န်' => 'an', + 'ပ်' => 'at', + 'မ်' => 'an', + 'စျ' => 'za', + 'ယ်' => 'e', + 'ဉ်' => 'in', + 'စ်' => 'it', + 'ိံ' => 'ein', + 'ဲ' => 'e', + 'း' => '', + 'ာ' => 'a', + 'ါ' => 'a', + 'ေ' => 'e', + 'ံ' => 'an', + 'ိ' => 'i', + 'ီ' => 'i', + 'ု' => 'u', + 'ူ' => 'u', + '်' => 'at', + '္' => '', + '့' => '', + 'က' => 'k', + '၉' => '9', + 'တ' => 't', + 'ရ' => 'ya', + 'ယ' => 'y', + 'မ' => 'm', + 'ဘ' => 'ba', + 'ဗ' => 'b', + 'ဖ' => 'pa', + 'ပ' => 'p', + 'န' => 'n', + 'ဓ' => 'da', + 'ဒ' => 'd', + 'ထ' => 'ta', + 'ဏ' => 'na', + 'ဝ' => 'w', + 'ဎ' => 'da', + 'ဍ' => 'd', + 'ဌ' => 'ta', + 'ဋ' => 't', + 'ည' => 'ny', + 'ဇ' => 'z', + 'ဆ' => 'sa', + 'စ' => 's', + 'င' => 'ng', + 'ဃ' => 'ga', + 'ဂ' => 'g', + 'လ' => 'l', + 'သ' => 'th', + '၈' => '8', + 'ဩ' => 'aw', + 'ခ' => 'kh', + '၆' => '6', + '၅' => '5', + '၄' => '4', + '၃' => '3', + '၂' => '2', + '၁' => '1', + '၀' => '0', + '၌' => 'hnaik', + '၍' => 'ywae', + 'ဪ' => 'aw', + 'ဦ' => '-u', + 'ဟ' => 'h', + 'ဉ' => 'u', + 'ဤ' => '-i', + 'ဣ' => 'i', + '၏' => '-e', + 'ဧ' => 'e', + 'ှ' => 'h', + 'ွ' => 'w', + 'ျ' => 'ya', + 'ြ' => 'y', + 'အ' => 'a', + 'ဠ' => 'la', + '၇' => '7', + ], + // Croatian (Hrvatska) + 'hr' => [ + 'DŽ' => 'DZ', + 'Dž' => 'Dz', + 'dž' => 'dz', + 'DZ' => 'DZ', + 'Dz' => 'Dz', + 'dz' => 'dz', + 'IJ' => 'IJ', + 'ij' => 'ij', + 'LJ' => 'LJ', + 'Lj' => 'Lj', + 'lj' => 'lj', + 'NJ' => 'NJ', + 'Nj' => 'Nj', + 'nj' => 'nj', + 'ž' => 'z', + 'Ž' => 'Z', + 'đ' => 'dj', + 'Đ' => 'Dj', + 'č' => 'c', + 'Č' => 'C', + 'ć' => 'c', + 'Ć' => 'C', + 'š' => 's', + 'Š' => 'S', + ], + // Finnish + 'fi' => [ + 'Ä' => 'A', + 'Ö' => 'O', + 'ä' => 'a', + 'ö' => 'o', + ], + // Georgian (Kartvelian) + 'ka' => [ + 'ა' => 'a', + 'ბ' => 'b', + 'გ' => 'g', + 'დ' => 'd', + 'ე' => 'e', + 'ვ' => 'v', + 'ზ' => 'z', + 'თ' => 't', + 'ი' => 'i', + 'კ' => 'k', + 'ლ' => 'l', + 'მ' => 'm', + 'ნ' => 'n', + 'ო' => 'o', + 'პ' => 'p', + 'ჟ' => 'zh', + 'რ' => 'r', + 'ს' => 's', + 'ტ' => 't', + 'უ' => 'u', + 'ფ' => 'f', + 'ქ' => 'q', + 'ღ' => 'gh', + 'ყ' => 'y', + 'შ' => 'sh', + 'ჩ' => 'ch', + 'ც' => 'ts', + 'ძ' => 'dz', + 'წ' => 'ts', + 'ჭ' => 'ch', + 'ხ' => 'kh', + 'ჯ' => 'j', + 'ჰ' => 'h', + ], + // Russian + 'ru' => [ + 'А' => 'A', + 'а' => 'a', + 'Б' => 'B', + 'б' => 'b', + 'В' => 'V', + 'в' => 'v', + 'Г' => 'G', + 'г' => 'g', + 'Д' => 'D', + 'д' => 'd', + 'Е' => 'E', + 'е' => 'e', + 'Ё' => 'Yo', + 'ё' => 'yo', + 'Ж' => 'Zh', + 'ж' => 'zh', + 'З' => 'Z', + 'з' => 'z', + 'И' => 'I', + 'и' => 'i', + 'Й' => 'Y', + 'й' => 'y', + 'К' => 'K', + 'к' => 'k', + 'Л' => 'L', + 'л' => 'l', + 'М' => 'M', + 'м' => 'm', + 'Н' => 'N', + 'н' => 'n', + 'О' => 'O', + 'о' => 'o', + 'П' => 'P', + 'п' => 'p', + 'Р' => 'R', + 'р' => 'r', + 'С' => 'S', + 'с' => 's', + 'Т' => 'T', + 'т' => 't', + 'У' => 'U', + 'у' => 'u', + 'Ф' => 'F', + 'ф' => 'f', + 'Х' => 'H', + 'х' => 'h', + 'Ц' => 'Ts', + 'ц' => 'ts', + 'Ч' => 'Ch', + 'ч' => 'ch', + 'ш' => 'sh', + 'Ш' => 'Sh', + 'Щ' => 'Sch', + 'щ' => 'sch', + 'Ъ' => '', + 'ъ' => '', + 'Ы' => 'Y', + 'ы' => 'y', + 'Ь' => '', + 'ь' => '', + 'Э' => 'E', + 'э' => 'e', + 'Ю' => 'Yu', + 'ю' => 'yu', + 'Я' => 'Ya', + 'я' => 'ya', + ], + // Russian - Passport (2013), ICAO + // -> https://en.m.wikipedia.org/wiki/Romanization_of_Russian#content-collapsible-block-1 + 'ru__passport_2013' => [ + 'А' => 'A', + 'а' => 'a', + 'Б' => 'B', + 'б' => 'b', + 'В' => 'V', + 'в' => 'v', + 'Г' => 'G', + 'г' => 'g', + 'Д' => 'D', + 'д' => 'd', + 'Е' => 'E', + 'е' => 'e', + 'Ё' => 'E', + 'ё' => 'e', + 'Ж' => 'Zh', + 'ж' => 'zh', + 'З' => 'Z', + 'з' => 'z', + 'И' => 'i', + 'и' => 'i', + 'Й' => 'i', + 'й' => 'i', + 'К' => 'K', + 'к' => 'k', + 'Л' => 'L', + 'л' => 'l', + 'М' => 'M', + 'м' => 'm', + 'Н' => 'N', + 'н' => 'n', + 'О' => 'O', + 'о' => 'o', + 'П' => 'P', + 'п' => 'p', + 'Р' => 'R', + 'р' => 'r', + 'С' => 'S', + 'с' => 's', + 'Т' => 'T', + 'т' => 't', + 'У' => 'U', + 'у' => 'u', + 'Ф' => 'F', + 'ф' => 'f', + 'Х' => 'Kh', + 'х' => 'kh', + 'Ц' => 'Ts', + 'ц' => 'ts', + 'Ч' => 'Ch', + 'ч' => 'ch', + 'ш' => 'sh', + 'Ш' => 'Sh', + 'Щ' => 'Shch', + 'щ' => 'shch', + 'Ъ' => 'Ie', + 'ъ' => 'ie', + 'Ы' => 'Y', + 'ы' => 'y', + 'Ь' => '', + 'ь' => '', + 'Э' => 'E', + 'э' => 'e', + 'Ю' => 'Iu', + 'ю' => 'iu', + 'Я' => 'Ia', + 'я' => 'ia', + 'І' => '', + 'і' => '', + 'Ѳ' => '', + 'ѳ' => '', + 'Ѣ' => '', + 'ѣ' => '', + 'Ѵ' => '', + 'ѵ' => '', + 'Є' => '', + 'є' => '', + 'Ѥ' => '', + 'ѥ' => '', + 'Ѕ' => '', + 'ѕ' => '', + 'Ꙋ' => '', + 'ꙋ' => '', + 'Ѡ' => '', + 'ѡ' => '', + 'Ѿ' => '', + 'ѿ' => '', + 'Ѫ' => '', + 'ѫ' => '', + 'Ѧ' => '', + 'ѧ' => '', + 'Ѭ' => '', + 'ѭ' => '', + 'Ѩ' => '', + 'ѩ' => '', + 'Ѯ' => '', + 'ѯ' => '', + 'Ѱ' => '', + 'ѱ' => '', + ], + // Russian - GOST 7.79-2000(B) + // -> https://en.m.wikipedia.org/wiki/Romanization_of_Russian#content-collapsible-block-1 + 'ru__gost_2000_b' => [ + 'А' => 'A', + 'а' => 'a', + 'Б' => 'B', + 'б' => 'b', + 'В' => 'V', + 'в' => 'v', + 'Г' => 'G', + 'г' => 'g', + 'Д' => 'D', + 'д' => 'd', + 'Е' => 'E', + 'е' => 'e', + 'Ё' => 'Yo', + 'ё' => 'yo', + 'Ж' => 'Zh', + 'ж' => 'zh', + 'З' => 'Z', + 'з' => 'z', + 'И' => 'i', + 'и' => 'i', + 'Й' => 'i', + 'й' => 'i', + 'К' => 'K', + 'к' => 'k', + 'Л' => 'L', + 'л' => 'l', + 'М' => 'M', + 'м' => 'm', + 'Н' => 'N', + 'н' => 'n', + 'О' => 'O', + 'о' => 'o', + 'П' => 'P', + 'п' => 'p', + 'Р' => 'R', + 'р' => 'r', + 'С' => 'S', + 'с' => 's', + 'Т' => 'T', + 'т' => 't', + 'У' => 'U', + 'у' => 'u', + 'Ф' => 'F', + 'ф' => 'f', + 'Х' => 'X', + 'х' => 'x', + 'Ц' => 'Cz', + 'ц' => 'cz', + 'Ч' => 'Ch', + 'ч' => 'ch', + 'ш' => 'sh', + 'Ш' => 'Sh', + 'Щ' => 'Shh', + 'щ' => 'shh', + 'Ъ' => '', + 'ъ' => '', + 'Ы' => 'Y\'', + 'ы' => 'y\'', + 'Ь' => '', + 'ь' => '', + 'Э' => 'E\'', + 'э' => 'e\'', + 'Ю' => 'Yu', + 'ю' => 'yu', + 'Я' => 'Ya', + 'я' => 'ya', + 'І' => 'I', + 'і' => 'i', + 'Ѳ' => 'Fh', + 'ѳ' => 'fh', + 'Ѣ' => 'Ye', + 'ѣ' => 'ye', + 'Ѵ' => 'Yh', + 'ѵ' => 'yh', + 'Є' => '', + 'є' => '', + 'Ѥ' => '', + 'ѥ' => '', + 'Ѕ' => 'Js', + 'ѕ' => 'js', + 'Ꙋ' => '', + 'ꙋ' => '', + 'Ѡ' => '', + 'ѡ' => '', + 'Ѿ' => '', + 'ѿ' => '', + 'Ѫ' => '', + 'ѫ' => '', + 'Ѧ' => '', + 'ѧ' => '', + 'Ѭ' => '', + 'ѭ' => '', + 'Ѩ' => '', + 'ѩ' => '', + 'Ѯ' => '', + 'ѯ' => '', + 'Ѱ' => '', + 'ѱ' => '', + ], + // Ukrainian + 'uk' => [ + 'Є' => 'Ye', + 'є' => 'ye', + 'І' => 'I', + 'і' => 'i', + 'Ї' => 'Yi', + 'ї' => 'yi', + 'Ґ' => 'G', + 'ґ' => 'g', + ], + // Kazakh + 'kk' => [ + 'Ә' => 'A', + 'Ғ' => 'G', + 'Қ' => 'Q', + 'Ң' => 'N', + 'Ө' => 'O', + 'Ұ' => 'U', + 'Ү' => 'U', + 'Һ' => 'H', + 'ә' => 'a', + 'ғ' => 'g', + 'қ' => 'q', + 'ң' => 'n', + 'ө' => 'o', + 'ұ' => 'u', + 'ү' => 'u', + 'һ' => 'h', + ], + // Czech + 'cs' => [ + 'á' => 'a', + 'Á' => 'A', + 'č' => 'c', + 'Č' => 'C', + 'ď' => 'd', + 'Ď' => 'D', + 'é' => 'e', + 'É' => 'E', + 'ě' => 'e', + 'Ě' => 'E', + 'í' => 'i', + 'Í' => 'I', + 'ň' => 'n', + 'Ň' => 'N', + 'ó' => 'o', + 'Ó' => 'O', + 'ř' => 'r', + 'Ř' => 'R', + 'š' => 's', + 'Š' => 'S', + 'ť' => 't', + 'Ť' => 'T', + 'ú' => 'u', + 'Ú' => 'U', + 'ů' => 'u', + 'Ů' => 'U', + 'ý' => 'y', + 'Ý' => 'Y', + 'ž' => 'z', + 'Ž' => 'Z', + ], + // Danish + 'da' => [ + 'Æ' => 'Ae', + 'æ' => 'ae', + 'Ø' => 'Oe', + 'ø' => 'oe', + 'Å' => 'Aa', + 'å' => 'aa', + 'É' => 'E', + 'é' => 'e', + ], + // Polish + 'pl' => [ + 'ą' => 'a', + 'ć' => 'c', + 'ę' => 'e', + 'ł' => 'l', + 'ń' => 'n', + 'ó' => 'o', + 'ś' => 's', + 'ź' => 'z', + 'ż' => 'z', + 'Ą' => 'A', + 'Ć' => 'C', + 'Ę' => 'E', + 'Ł' => 'L', + 'Ń' => 'N', + 'Ó' => 'O', + 'Ś' => 'S', + 'Ź' => 'Z', + 'Ż' => 'Z', + ], + // Romanian + 'ro' => [ + 'ă' => 'a', + 'â' => 'a', + 'Ă' => 'A', + 'Â' => 'A', + 'î' => 'i', + 'Î' => 'I', + 'ș' => 's', + 'ş' => 's', + 'Ş' => 'S', + 'Ș' => 'S', + 'ț' => 't', + 'ţ' => 't', + 'Ţ' => 'T', + 'Ț' => 'T', + ], + // Esperanto + 'eo' => [ + 'ĉ' => 'cx', + 'ĝ' => 'gx', + 'ĥ' => 'hx', + 'ĵ' => 'jx', + 'ŝ' => 'sx', + 'ŭ' => 'ux', + 'Ĉ' => 'CX', + 'Ĝ' => 'GX', + 'Ĥ' => 'HX', + 'Ĵ' => 'JX', + 'Ŝ' => 'SX', + 'Ŭ' => 'UX', + ], + // Estonian + 'et' => [ + 'Š' => 'S', + 'Ž' => 'Z', + 'Õ' => 'O', + 'Ä' => 'A', + 'Ö' => 'O', + 'Ü' => 'U', + 'š' => 's', + 'ž' => 'z', + 'õ' => 'o', + 'ä' => 'a', + 'ö' => 'o', + 'ü' => 'u', + ], + // Latvian + 'lv' => [ + 'ā' => 'a', + 'č' => 'c', + 'ē' => 'e', + 'ģ' => 'g', + 'ī' => 'i', + 'ķ' => 'k', + 'ļ' => 'l', + 'ņ' => 'n', + 'š' => 's', + 'ū' => 'u', + 'ž' => 'z', + 'Ā' => 'A', + 'Č' => 'C', + 'Ē' => 'E', + 'Ģ' => 'G', + 'Ī' => 'i', + 'Ķ' => 'k', + 'Ļ' => 'L', + 'Ņ' => 'N', + 'Š' => 'S', + 'Ū' => 'u', + 'Ž' => 'Z', + ], + // Lithuanian + 'lt' => [ + 'ą' => 'a', + 'č' => 'c', + 'ę' => 'e', + 'ė' => 'e', + 'į' => 'i', + 'š' => 's', + 'ų' => 'u', + 'ū' => 'u', + 'ž' => 'z', + 'Ą' => 'A', + 'Č' => 'C', + 'Ę' => 'E', + 'Ė' => 'E', + 'Į' => 'I', + 'Š' => 'S', + 'Ų' => 'U', + 'Ū' => 'U', + 'Ž' => 'Z', + ], + // Norwegian + 'no' => [ + 'Æ' => 'AE', + 'æ' => 'ae', + 'Ø' => 'OE', + 'ø' => 'oe', + 'Å' => 'AA', + 'å' => 'aa', + ], + // Vietnamese + 'vi' => [ + 'Á' => 'A', + 'À' => 'A', + 'Ả' => 'A', + 'Ã' => 'A', + 'Ạ' => 'A', + 'Ă' => 'A', + 'Ắ' => 'A', + 'Ằ' => 'A', + 'Ẳ' => 'A', + 'Ẵ' => 'A', + 'Ặ' => 'A', + 'Â' => 'A', + 'Ấ' => 'A', + 'Ầ' => 'A', + 'Ẩ' => 'A', + 'Ẫ' => 'A', + 'Ậ' => 'A', + 'á' => 'a', + 'à' => 'a', + 'ả' => 'a', + 'ã' => 'a', + 'ạ' => 'a', + 'ă' => 'a', + 'ắ' => 'a', + 'ằ' => 'a', + 'ẳ' => 'a', + 'ẵ' => 'a', + 'ặ' => 'a', + 'â' => 'a', + 'ấ' => 'a', + 'ầ' => 'a', + 'ẩ' => 'a', + 'ẫ' => 'a', + 'ậ' => 'a', + 'É' => 'E', + 'È' => 'E', + 'Ẻ' => 'E', + 'Ẽ' => 'E', + 'Ẹ' => 'E', + 'Ê' => 'E', + 'Ế' => 'E', + 'Ề' => 'E', + 'Ể' => 'E', + 'Ễ' => 'E', + 'Ệ' => 'E', + 'é' => 'e', + 'è' => 'e', + 'ẻ' => 'e', + 'ẽ' => 'e', + 'ẹ' => 'e', + 'ê' => 'e', + 'ế' => 'e', + 'ề' => 'e', + 'ể' => 'e', + 'ễ' => 'e', + 'ệ' => 'e', + 'Í' => 'I', + 'Ì' => 'I', + 'Ỉ' => 'I', + 'Ĩ' => 'I', + 'Ị' => 'I', + 'í' => 'i', + 'ì' => 'i', + 'ỉ' => 'i', + 'ĩ' => 'i', + 'ị' => 'i', + 'Ó' => 'O', + 'Ò' => 'O', + 'Ỏ' => 'O', + 'Õ' => 'O', + 'Ọ' => 'O', + 'Ô' => 'O', + 'Ố' => 'O', + 'Ồ' => 'O', + 'Ổ' => 'O', + 'Ỗ' => 'O', + 'Ộ' => 'O', + 'Ơ' => 'O', + 'Ớ' => 'O', + 'Ờ' => 'O', + 'Ở' => 'O', + 'Ỡ' => 'O', + 'Ợ' => 'O', + 'ó' => 'o', + 'ò' => 'o', + 'ỏ' => 'o', + 'õ' => 'o', + 'ọ' => 'o', + 'ô' => 'o', + 'ố' => 'o', + 'ồ' => 'o', + 'ổ' => 'o', + 'ỗ' => 'o', + 'ộ' => 'o', + 'ơ' => 'o', + 'ớ' => 'o', + 'ờ' => 'o', + 'ở' => 'o', + 'ỡ' => 'o', + 'ợ' => 'o', + 'Ú' => 'U', + 'Ù' => 'U', + 'Ủ' => 'U', + 'Ũ' => 'U', + 'Ụ' => 'U', + 'Ư' => 'U', + 'Ứ' => 'U', + 'Ừ' => 'U', + 'Ử' => 'U', + 'Ữ' => 'U', + 'Ự' => 'U', + 'ú' => 'u', + 'ù' => 'u', + 'ủ' => 'u', + 'ũ' => 'u', + 'ụ' => 'u', + 'ư' => 'u', + 'ứ' => 'u', + 'ừ' => 'u', + 'ử' => 'u', + 'ữ' => 'u', + 'ự' => 'u', + 'Ý' => 'Y', + 'Ỳ' => 'Y', + 'Ỷ' => 'Y', + 'Ỹ' => 'Y', + 'Ỵ' => 'Y', + 'ý' => 'y', + 'ỳ' => 'y', + 'ỷ' => 'y', + 'ỹ' => 'y', + 'ỵ' => 'y', + 'Đ' => 'D', + 'đ' => 'd', + ], + // Persian (Farsi) + 'fa' => [ + 'ا' => 'a', + 'ب' => 'b', + 'پ' => 'b', + 'ت' => 't', + 'ث' => 's', + 'ج' => 'g', + 'چ' => 'ch', + 'ح' => 'h', + 'خ' => 'kh', + 'د' => 'd', + 'ذ' => 'z', + 'ر' => 'r', + 'ز' => 'z', + 'س' => 's', + 'ش' => 'sh', + 'ص' => 's', + 'ض' => 'z', + 'ط' => 't', + 'ظ' => 'z', + 'ع' => 'a', + 'غ' => 'gh', + 'ف' => 'f', + 'ق' => 'gh', + 'ک' => 'k', + 'گ' => 'g', + 'ل' => 'l', + 'ژ' => 'zh', + 'ك' => 'k', + 'م' => 'm', + 'ن' => 'n', + 'ه' => 'h', + 'و' => 'o', + 'ی' => 'y', + 'آ' => 'a', + '٠' => '0', + '١' => '1', + '٢' => '2', + '٣' => '3', + '٤' => '4', + '٥' => '5', + '٦' => '6', + '٧' => '7', + '٨' => '8', + '٩' => '9', + ], + // Arabic + 'ar' => [ + 'أ' => 'a', + 'ب' => 'b', + 'ت' => 't', + 'ث' => 'th', + 'ج' => 'g', + 'ح' => 'h', + 'خ' => 'kh', + 'د' => 'd', + 'ذ' => 'th', + 'ر' => 'r', + 'ز' => 'z', + 'س' => 's', + 'ش' => 'sh', + 'ص' => 's', + 'ض' => 'd', + 'ط' => 't', + 'ظ' => 'th', + 'ع' => 'aa', + 'غ' => 'gh', + 'ف' => 'f', + 'ق' => 'k', + 'ك' => 'k', + 'ل' => 'l', + 'م' => 'm', + 'ن' => 'n', + 'ه' => 'h', + 'و' => 'o', + 'ي' => 'y', + 'ا' => 'a', + 'إ' => 'a', + 'آ' => 'a', + 'ؤ' => 'o', + 'ئ' => 'y', + 'ء' => 'aa', + '٠' => '0', + '١' => '1', + '٢' => '2', + '٣' => '3', + '٤' => '4', + '٥' => '5', + '٦' => '6', + '٧' => '7', + '٨' => '8', + '٩' => '9', + ], + // Serbian + 'sr' => [ + 'đ' => 'dj', + 'ž' => 'z', + 'ć' => 'c', + 'č' => 'c', + 'š' => 's', + 'Đ' => 'Dj', + 'Ž' => 'Z', + 'Ć' => 'C', + 'Č' => 'C', + 'Š' => 'S', + 'а' => 'a', + 'б' => 'b', + 'в' => 'v', + 'г' => 'g', + 'д' => 'd', + 'ђ' => 'dj', + 'е' => 'e', + 'ж' => 'z', + 'з' => 'z', + 'и' => 'i', + 'ј' => 'j', + 'к' => 'k', + 'л' => 'l', + 'љ' => 'lj', + 'м' => 'm', + 'н' => 'n', + 'њ' => 'nj', + 'о' => 'o', + 'п' => 'p', + 'р' => 'r', + 'с' => 's', + 'т' => 't', + 'ћ' => 'c', + 'у' => 'u', + 'ф' => 'f', + 'х' => 'h', + 'ц' => 'c', + 'ч' => 'c', + 'џ' => 'dz', + 'ш' => 's', + 'А' => 'A', + 'Б' => 'B', + 'В' => 'V', + 'Г' => 'G', + 'Д' => 'D', + 'Ђ' => 'Dj', + 'Е' => 'E', + 'Ж' => 'Z', + 'З' => 'Z', + 'И' => 'I', + 'Ј' => 'j', + 'К' => 'K', + 'Л' => 'L', + 'Љ' => 'Lj', + 'М' => 'M', + 'Н' => 'N', + 'Њ' => 'Nj', + 'О' => 'O', + 'П' => 'P', + 'Р' => 'R', + 'С' => 'S', + 'Т' => 'T', + 'Ћ' => 'C', + 'У' => 'U', + 'Ф' => 'F', + 'Х' => 'H', + 'Ц' => 'C', + 'Ч' => 'C', + 'Џ' => 'Dz', + 'Ш' => 'S', + ], + // Serbian - Cyrillic + 'sr__cyr' => [ + 'а' => 'a', + 'б' => 'b', + 'в' => 'v', + 'г' => 'g', + 'д' => 'd', + 'ђ' => 'dj', + 'е' => 'e', + 'ж' => 'z', + 'з' => 'z', + 'и' => 'i', + 'ј' => 'j', + 'к' => 'k', + 'л' => 'l', + 'љ' => 'lj', + 'м' => 'm', + 'н' => 'n', + 'њ' => 'nj', + 'о' => 'o', + 'п' => 'p', + 'р' => 'r', + 'с' => 's', + 'т' => 't', + 'ћ' => 'c', + 'у' => 'u', + 'ф' => 'f', + 'х' => 'h', + 'ц' => 'c', + 'ч' => 'c', + 'џ' => 'dz', + 'ш' => 's', + 'А' => 'A', + 'Б' => 'B', + 'В' => 'V', + 'Г' => 'G', + 'Д' => 'D', + 'Ђ' => 'Dj', + 'Е' => 'E', + 'Ж' => 'Z', + 'З' => 'Z', + 'И' => 'I', + 'Ј' => 'j', + 'К' => 'K', + 'Л' => 'L', + 'Љ' => 'Lj', + 'М' => 'M', + 'Н' => 'N', + 'Њ' => 'Nj', + 'О' => 'O', + 'П' => 'P', + 'Р' => 'R', + 'С' => 'S', + 'Т' => 'T', + 'Ћ' => 'C', + 'У' => 'U', + 'Ф' => 'F', + 'Х' => 'H', + 'Ц' => 'C', + 'Ч' => 'C', + 'Џ' => 'Dz', + 'Ш' => 'S', + ], + // Serbian - Latin + 'sr__lat' => [ + 'đ' => 'dj', + 'ž' => 'z', + 'ć' => 'c', + 'č' => 'c', + 'š' => 's', + 'Đ' => 'Dj', + 'Ž' => 'Z', + 'Ć' => 'C', + 'Č' => 'C', + 'Š' => 'S', + ], + // Azerbaijani + 'az' => [ + 'ç' => 'c', + 'ə' => 'e', + 'ğ' => 'g', + 'ı' => 'i', + 'ö' => 'o', + 'ş' => 's', + 'ü' => 'u', + 'Ç' => 'C', + 'Ə' => 'E', + 'Ğ' => 'G', + 'İ' => 'I', + 'Ö' => 'O', + 'Ş' => 'S', + 'Ü' => 'U', + ], + // Slovak + 'sk' => [ + 'á' => 'a', + 'ä' => 'a', + 'č' => 'c', + 'ď' => 'd', + 'é' => 'e', + 'í' => 'i', + 'ľ' => 'l', + 'ĺ' => 'l', + 'ň' => 'n', + 'ó' => 'o', + 'ô' => 'o', + 'ŕ' => 'r', + 'š' => 's', + 'ť' => 't', + 'ú' => 'u', + 'ý' => 'y', + 'ž' => 'z', + 'Á' => 'A', + 'Ä' => 'A', + 'Č' => 'C', + 'Ď' => 'D', + 'É' => 'E', + 'Í' => 'I', + 'Ľ' => 'L', + 'Ĺ' => 'L', + 'Ň' => 'N', + 'Ó' => 'O', + 'Ô' => 'O', + 'Ŕ' => 'R', + 'Š' => 'S', + 'Ť' => 'T', + 'Ú' => 'U', + 'Ý' => 'Y', + 'Ž' => 'Z', + ], + // French + 'fr' => [ + 'Æ' => 'AE', + 'æ' => 'ae', + 'Œ' => 'OE', + 'œ' => 'oe', + 'â' => 'a', + 'Â' => 'A', + 'à' => 'a', + 'À' => 'A', + 'ä' => 'a', + 'Ä' => 'A', + 'ç' => 'c', + 'Ç' => 'C', + 'é' => 'e', + 'É' => 'E', + 'ê' => 'e', + 'Ê' => 'E', + 'ë' => 'e', + 'Ë' => 'E', + 'è' => 'e', + 'È' => 'E', + 'ï' => 'i', + 'î' => 'i', + 'Ï' => 'I', + 'Î' => 'I', + 'ÿ' => 'y', + 'Ÿ' => 'Y', + 'ô' => 'o', + 'Ô' => 'O', + 'ö' => 'o', + 'Ö' => 'O', + 'û' => 'u', + 'Û' => 'U', + 'ù' => 'u', + 'Ù' => 'U', + 'ü' => 'u', + 'Ü' => 'U', + ], + // Austrian (French) + 'fr_at' => [ + 'ß' => 'sz', + 'ẞ' => 'SZ', + 'Æ' => 'AE', + 'æ' => 'ae', + 'Œ' => 'OE', + 'œ' => 'oe', + 'â' => 'a', + 'Â' => 'A', + 'à' => 'a', + 'À' => 'A', + 'ä' => 'a', + 'Ä' => 'A', + 'ç' => 'c', + 'Ç' => 'C', + 'é' => 'e', + 'É' => 'E', + 'ê' => 'e', + 'Ê' => 'E', + 'ë' => 'e', + 'Ë' => 'E', + 'è' => 'e', + 'È' => 'E', + 'ï' => 'i', + 'î' => 'i', + 'Ï' => 'I', + 'Î' => 'I', + 'ÿ' => 'y', + 'Ÿ' => 'Y', + 'ô' => 'o', + 'Ô' => 'O', + 'ö' => 'o', + 'Ö' => 'O', + 'û' => 'u', + 'Û' => 'U', + 'ù' => 'u', + 'Ù' => 'U', + 'ü' => 'u', + 'Ü' => 'U', + ], + // Switzerland (French) + 'fr_ch' => [ + 'ß' => 'ss', + 'ẞ' => 'SS', + 'Æ' => 'AE', + 'æ' => 'ae', + 'Œ' => 'OE', + 'œ' => 'oe', + 'â' => 'a', + 'Â' => 'A', + 'à' => 'a', + 'À' => 'A', + 'ä' => 'a', + 'Ä' => 'A', + 'ç' => 'c', + 'Ç' => 'C', + 'é' => 'e', + 'É' => 'E', + 'ê' => 'e', + 'Ê' => 'E', + 'ë' => 'e', + 'Ë' => 'E', + 'è' => 'e', + 'È' => 'E', + 'ï' => 'i', + 'î' => 'i', + 'Ï' => 'I', + 'Î' => 'I', + 'ÿ' => 'y', + 'Ÿ' => 'Y', + 'ô' => 'o', + 'Ô' => 'O', + 'ö' => 'o', + 'Ö' => 'O', + 'û' => 'u', + 'Û' => 'U', + 'ù' => 'u', + 'Ù' => 'U', + 'ü' => 'u', + 'Ü' => 'U', + ], + // German + 'de' => [ + 'Ä' => 'Ae', + 'Ö' => 'Oe', + 'Ü' => 'Ue', + 'ä' => 'ae', + 'ö' => 'oe', + 'ü' => 'ue', + 'ß' => 'ss', + 'ẞ' => 'SS', + ], + // Austrian (German) + 'de_at' => [ + 'Ä' => 'Ae', + 'Ö' => 'Oe', + 'Ü' => 'Ue', + 'ä' => 'ae', + 'ö' => 'oe', + 'ü' => 'ue', + 'ß' => 'sz', + 'ẞ' => 'SZ', + ], + // Switzerland (German) + 'de_ch' => [ + 'Ä' => 'Ae', + 'Ö' => 'Oe', + 'Ü' => 'Ue', + 'ä' => 'ae', + 'ö' => 'oe', + 'ü' => 'ue', + 'ß' => 'ss', + 'ẞ' => 'SS', + ], + // Bengali (Bangla) + 'bn' => [ + 'ভ্ল' => 'vl', + 'পশ' => 'psh', + 'ব্ধ' => 'bdh', + 'ব্জ' => 'bj', + 'ব্দ' => 'bd', + 'ব্ব' => 'bb', + 'ব্ল' => 'bl', + 'ভ' => 'v', + 'ব' => 'b', + 'চ্ঞ' => 'cNG', + 'চ্ছ' => 'cch', + 'চ্চ' => 'cc', + 'ছ' => 'ch', + 'চ' => 'c', + 'ধ্ন' => 'dhn', + 'ধ্ম' => 'dhm', + 'দ্ঘ' => 'dgh', + 'দ্ধ' => 'ddh', + 'দ্ভ' => 'dv', + 'দ্ম' => 'dm', + 'ড্ড' => 'DD', + 'ঢ' => 'Dh', + 'ধ' => 'dh', + 'দ্গ' => 'dg', + 'দ্দ' => 'dd', + 'ড' => 'D', + 'দ' => 'd', + '।' => '.', + 'ঘ্ন' => 'Ghn', + 'গ্ধ' => 'Gdh', + 'গ্ণ' => 'GN', + 'গ্ন' => 'Gn', + 'গ্ম' => 'Gm', + 'গ্ল' => 'Gl', + 'জ্ঞ' => 'jNG', + 'ঘ' => 'Gh', + 'গ' => 'g', + 'হ্ণ' => 'hN', + 'হ্ন' => 'hn', + 'হ্ম' => 'hm', + 'হ্ল' => 'hl', + 'হ' => 'h', + 'জ্ঝ' => 'jjh', + 'ঝ' => 'jh', + 'জ্জ' => 'jj', + 'জ' => 'j', + 'ক্ষ্ণ' => 'kxN', + 'ক্ষ্ম' => 'kxm', + 'ক্ষ' => 'ksh', + 'কশ' => 'ksh', + 'ক্ক' => 'kk', + 'ক্ট' => 'kT', + 'ক্ত' => 'kt', + 'ক্ল' => 'kl', + 'ক্স' => 'ks', + 'খ' => 'kh', + 'ক' => 'k', + 'ল্ভ' => 'lv', + 'ল্ধ' => 'ldh', + 'লখ' => 'lkh', + 'লঘ' => 'lgh', + 'লফ' => 'lph', + 'ল্ক' => 'lk', + 'ল্গ' => 'lg', + 'ল্ট' => 'lT', + 'ল্ড' => 'lD', + 'ল্প' => 'lp', + 'ল্ম' => 'lm', + 'ল্ল' => 'll', + 'ল্ব' => 'lb', + 'ল' => 'l', + 'ম্থ' => 'mth', + 'ম্ফ' => 'mf', + 'ম্ভ' => 'mv', + 'মপ্ল' => 'mpl', + 'ম্ন' => 'mn', + 'ম্প' => 'mp', + 'ম্ম' => 'mm', + 'ম্ল' => 'ml', + 'ম্ব' => 'mb', + 'ম' => 'm', + '০' => '0', + '১' => '1', + '২' => '2', + '৩' => '3', + '৪' => '4', + '৫' => '5', + '৬' => '6', + '৭' => '7', + '৮' => '8', + '৯' => '9', + 'ঙ্ক্ষ' => 'Ngkx', + 'ঞ্ছ' => 'nch', + 'ঙ্ঘ' => 'ngh', + 'ঙ্খ' => 'nkh', + 'ঞ্ঝ' => 'njh', + 'ঙ্গৌ' => 'ngOU', + 'ঙ্গৈ' => 'ngOI', + 'ঞ্চ' => 'nc', + 'ঙ্ক' => 'nk', + 'ঙ্ষ' => 'Ngx', + 'ঙ্গ' => 'ngo', + 'ঙ্ম' => 'Ngm', + 'ঞ্জ' => 'nj', + 'ন্ধ' => 'ndh', + 'ন্ঠ' => 'nTh', + 'ণ্ঠ' => 'NTh', + 'ন্থ' => 'nth', + 'ঙ্গা' => 'nga', + 'ঙ্গি' => 'ngi', + 'ঙ্গী' => 'ngI', + 'ঙ্গু' => 'ngu', + 'ঙ্গূ' => 'ngU', + 'ঙ্গে' => 'nge', + 'ঙ্গো' => 'ngO', + 'ণ্ঢ' => 'NDh', + 'নশ' => 'nsh', + 'ঙর' => 'Ngr', + 'ঞর' => 'NGr', + 'ংর' => 'ngr', + 'ঙ' => 'Ng', + 'ঞ' => 'NG', + 'ং' => 'ng', + 'ন্ন' => 'nn', + 'ণ্ণ' => 'NN', + 'ণ্ন' => 'Nn', + 'ন্ম' => 'nm', + 'ণ্ম' => 'Nm', + 'ন্দ' => 'nd', + 'ন্ট' => 'nT', + 'ণ্ট' => 'NT', + 'ন্ড' => 'nD', + 'ণ্ড' => 'ND', + 'ন্ত' => 'nt', + 'ন্স' => 'ns', + 'ন' => 'n', + 'ণ' => 'N', + 'ৈ' => 'OI', + 'ৌ' => 'OU', + 'ো' => 'O', + 'ঐ' => 'OI', + 'ঔ' => 'OU', + 'অ' => 'o', + 'ও' => 'oo', + 'ফ্ল' => 'fl', + 'প্ট' => 'pT', + 'প্ত' => 'pt', + 'প্ন' => 'pn', + 'প্প' => 'pp', + 'প্ল' => 'pl', + 'প্স' => 'ps', + 'ফ' => 'f', + 'প' => 'p', + 'ৃ' => 'rri', + 'ঋ' => 'rri', + 'রর্য' => 'rry', + '্র্য' => 'ry', + '্রর' => 'rr', + 'ড়্গ' => 'Rg', + 'ঢ়' => 'Rh', + 'ড়' => 'R', + 'র' => 'r', + '্র' => 'r', + 'শ্ছ' => 'Sch', + 'ষ্ঠ' => 'ShTh', + 'ষ্ফ' => 'Shf', + 'স্ক্ল' => 'skl', + 'স্খ' => 'skh', + 'স্থ' => 'sth', + 'স্ফ' => 'sf', + 'শ্চ' => 'Sc', + 'শ্ত' => 'St', + 'শ্ন' => 'Sn', + 'শ্ম' => 'Sm', + 'শ্ল' => 'Sl', + 'ষ্ক' => 'Shk', + 'ষ্ট' => 'ShT', + 'ষ্ণ' => 'ShN', + 'ষ্প' => 'Shp', + 'ষ্ম' => 'Shm', + 'স্প্ল' => 'spl', + 'স্ক' => 'sk', + 'স্ট' => 'sT', + 'স্ত' => 'st', + 'স্ন' => 'sn', + 'স্প' => 'sp', + 'স্ম' => 'sm', + 'স্ল' => 'sl', + 'শ' => 'S', + 'ষ' => 'Sh', + 'স' => 's', + 'ু' => 'u', + 'উ' => 'u', + 'অ্য' => 'oZ', + 'ত্থ' => 'tth', + 'ৎ' => 'tt', + 'ট্ট' => 'TT', + 'ট্ম' => 'Tm', + 'ঠ' => 'Th', + 'ত্ন' => 'tn', + 'ত্ম' => 'tm', + 'থ' => 'th', + 'ত্ত' => 'tt', + 'ট' => 'T', + 'ত' => 't', + 'অ্যা' => 'AZ', + 'া' => 'a', + 'আ' => 'a', + 'য়া' => 'ya', + 'য়' => 'y', + 'ি' => 'i', + 'ই' => 'i', + 'ী' => 'ee', + 'ঈ' => 'ee', + 'ূ' => 'uu', + 'ঊ' => 'uu', + 'ে' => 'e', + 'এ' => 'e', + 'য' => 'z', + '্য' => 'Z', + 'ইয়' => 'y', + 'ওয়' => 'w', + '্ব' => 'w', + 'এক্স' => 'x', + 'ঃ' => ':', + 'ঁ' => 'nn', + '্' => '', + ], + // English + 'en' => [ + ], + // Latin (+ Cyrillic ?) chars + // + // -> Mix of languages, but we need to keep this here, so that different languages can handle there own behavior. + 'latin' => [ + '˚' => '0', + '¹' => '1', + '²' => '2', + '³' => '3', + '⁴' => '4', + '⁵' => '5', + '⁶' => '6', + '⁷' => '7', + '⁸' => '8', + '⁹' => '9', + '₀' => '0', + '₁' => '1', + '₂' => '2', + '₃' => '3', + '₄' => '4', + '₅' => '5', + '₆' => '6', + '₇' => '7', + '₈' => '8', + '₉' => '9', + '௦' => '0', + '௧' => '1', + '௨' => '2', + '௩' => '3', + '௪' => '4', + '௫' => '5', + '௬' => '6', + '௭' => '7', + '௮' => '8', + '௯' => '9', + '௰' => '10', + '௱' => '100', + '௲' => '1000', + 'Ꜳ' => 'AA', + 'ꜳ' => 'aa', + 'Æ' => 'AE', + 'æ' => 'ae', + 'Ǽ' => 'AE', + 'ǽ' => 'ae', + 'Ꜵ' => 'AO', + 'ꜵ' => 'ao', + 'Ꜷ' => 'AU', + 'ꜷ' => 'au', + 'Ꜹ' => 'AV', + 'ꜹ' => 'av', + 'Ꜻ' => 'av', + 'ꜻ' => 'av', + 'Ꜽ' => 'AY', + 'ꜽ' => 'ay', + 'ȸ' => 'db', + 'ʣ' => 'dz', + 'ʥ' => 'dz', + 'ʤ' => 'dezh', + '🙰' => 'et', + 'ff' => 'ff', + 'ffi' => 'ffi', + 'ffl' => 'ffl', + 'fi' => 'fi', + 'fl' => 'fl', + 'ʩ' => 'feng', + 'IJ' => 'IJ', + 'ij' => 'ij', + 'ʪ' => 'ls', + 'ʫ' => 'lz', + 'ɮ' => 'lezh', + 'ȹ' => 'qp', + 'ʨ' => 'tc', + 'ʦ' => 'ts', + 'ʧ' => 'tesh', + 'Œ' => 'OE', + 'œ' => 'oe', + 'Ꝏ' => 'OO', + 'ꝏ' => 'oo', + 'ẞ' => 'SS', + 'ß' => 'ss', + 'st' => 'st', + 'ſt' => 'st', + 'Ꜩ' => 'TZ', + 'ꜩ' => 'tz', + 'ᵫ' => 'ue', + 'Aι' => 'Ai', + 'αι' => 'ai', + 'Ει' => 'Ei', + 'ει' => 'ei', + 'Οι' => 'Oi', + 'οι' => 'oi', + 'Ου' => 'Oy', + 'ου' => 'oy', + 'Υι' => 'Yi', + 'υι' => 'yi', + 'ἀ' => 'a', + 'ἁ' => 'a', + 'ἂ' => 'a', + 'ἃ' => 'a', + 'ἄ' => 'a', + 'ἅ' => 'a', + 'ἆ' => 'a', + 'ἇ' => 'a', + 'Ἀ' => 'A', + 'Ἁ' => 'A', + 'Ἂ' => 'A', + 'Ἃ' => 'A', + 'Ἄ' => 'A', + 'Ἅ' => 'A', + 'Ἆ' => 'A', + 'Ἇ' => 'A', + 'ᾰ' => 'a', + 'ᾱ' => 'a', + 'ᾲ' => 'a', + 'ᾳ' => 'a', + 'ᾴ' => 'a', + 'ᾶ' => 'a', + 'ᾷ' => 'a', + 'Ᾰ' => 'A', + 'Ᾱ' => 'A', + 'Ὰ' => 'A', + 'Ά' => 'A', + 'ᾼ' => 'A', + 'Ä' => 'A', + 'ä' => 'a', + 'À' => 'A', + 'à' => 'a', + 'Á' => 'A', + 'á' => 'a', + 'Â' => 'A', + 'â' => 'a', + 'Ã' => 'A', + 'ã' => 'a', + 'A̧' => 'A', + 'a̧' => 'a', + 'Ą' => 'A', + 'ą' => 'a', + 'Ⱥ' => 'A', + 'ⱥ' => 'a', + 'Å' => 'A', + 'å' => 'a', + 'Ǻ' => 'A', + 'ǻ' => 'a', + 'Ă' => 'A', + 'ă' => 'a', + 'Ǎ' => 'A', + 'ǎ' => 'a', + 'Ȧ' => 'A', + 'ȧ' => 'a', + 'Ạ' => 'A', + 'ạ' => 'a', + 'Ā' => 'A', + 'ā' => 'a', + 'ª' => 'a', + 'Ɓ' => 'B', + 'Ѣ' => 'E', + 'ѣ' => 'e', + 'Ç' => 'C', + 'ç' => 'c', + 'Ĉ' => 'C', + 'ĉ' => 'c', + 'C̈' => 'C', + 'c̈' => 'c', + 'C̨' => 'C', + 'c̨' => 'c', + 'Ȼ' => 'C', + 'ȼ' => 'c', + 'Č' => 'C', + 'č' => 'c', + 'Ć' => 'C', + 'ć' => 'c', + 'C̀' => 'C', + 'c̀' => 'c', + 'Ċ' => 'C', + 'ċ' => 'c', + 'C̣' => 'C', + 'c̣' => 'c', + 'C̄' => 'C', + 'c̄' => 'c', + 'C̃' => 'C', + 'c̃' => 'c', + 'Ð' => 'D', + 'Đ' => 'D', + 'ð' => 'd', + 'đ' => 'd', + 'È' => 'E', + 'É' => 'E', + 'Ê' => 'E', + 'Ë' => 'E', + 'Ĕ' => 'E', + 'Ė' => 'E', + 'Ȩ' => 'E', + 'ȩ' => 'e', + 'Ę' => 'E', + 'ę' => 'e', + 'Ɇ' => 'E', + 'ɇ' => 'e', + 'Ě' => 'E', + 'ě' => 'e', + 'Ẹ' => 'E', + 'ẹ' => 'e', + 'Ē' => 'E', + 'ē' => 'e', + 'Ẽ' => 'E', + 'ẽ' => 'e', + 'è' => 'e', + 'é' => 'e', + 'ê' => 'e', + 'ë' => 'e', + 'ĕ' => 'e', + 'ė' => 'e', + 'ƒ' => 'f', + 'Ѳ' => 'F', + 'ѳ' => 'f', + 'Ĝ' => 'G', + 'Ġ' => 'G', + 'ĝ' => 'g', + 'ġ' => 'g', + 'Ĥ' => 'H', + 'Ħ' => 'H', + 'ĥ' => 'h', + 'ħ' => 'h', + 'Ì' => 'I', + 'Í' => 'I', + 'Î' => 'I', + 'Ï' => 'I', + 'Ĩ' => 'I', + 'Ĭ' => 'I', + 'Ǐ' => 'I', + 'Į' => 'I', + 'ì' => 'i', + 'í' => 'i', + 'î' => 'i', + 'ï' => 'i', + 'ĩ' => 'i', + 'ĭ' => 'i', + 'ǐ' => 'i', + 'į' => 'i', + 'І' => 'I', + 'і' => 'i', + 'I̧' => 'I', + 'i̧' => 'i', + 'Ɨ' => 'I', + 'ɨ' => 'i', + 'İ' => 'I', + 'i' => 'i', + 'Ị' => 'I', + 'ị' => 'i', + 'Ī' => 'I', + 'ī' => 'i', + 'Ĵ' => 'J', + 'ĵ' => 'j', + 'J́́' => 'J', + 'j́' => 'j', + 'J̀̀' => 'J', + 'j̀' => 'j', + 'J̈' => 'J', + 'j̈' => 'j', + 'J̧' => 'J', + 'j̧' => 'j', + 'J̨' => 'J', + 'j̨' => 'j', + 'Ɉ' => 'J', + 'ɉ' => 'j', + 'J̌' => 'J', + 'ǰ' => 'j', + 'J̇' => 'J', + 'j' => 'j', + 'J̣' => 'J', + 'j̣' => 'j', + 'J̄' => 'J', + 'j̄' => 'j', + 'J̃' => 'J', + 'j̃' => 'j', + 'ĸ' => 'k', + 'Ĺ' => 'L', + 'Ľ' => 'L', + 'Ŀ' => 'L', + 'ĺ' => 'l', + 'ľ' => 'l', + 'ŀ' => 'l', + 'L̀' => 'L', + 'l̀' => 'l', + 'L̂' => 'L', + 'l̂' => 'l', + 'L̈' => 'L', + 'l̈' => 'l', + 'Ļ' => 'L', + 'ļ' => 'l', + 'L̨' => 'L', + 'l̨' => 'l', + 'Ł' => 'L', + 'ł' => 'l', + 'Ƚ' => 'L', + 'ƚ' => 'l', + 'L̇' => 'L', + 'l̇' => 'l', + 'Ḷ' => 'L', + 'ḷ' => 'l', + 'L̄' => 'L', + 'l̄' => 'l', + 'L̃' => 'L', + 'l̃' => 'l', + 'Ñ' => 'N', + 'ñ' => 'n', + 'Ŋ' => 'N', + 'ŋ' => 'n', + 'ʼn' => 'n', + 'Ń' => 'N', + 'ń' => 'n', + 'Ǹ' => 'N', + 'ǹ' => 'n', + 'N̂' => 'N', + 'n̂' => 'n', + 'N̈' => 'N', + 'n̈' => 'n', + 'Ņ' => 'N', + 'ņ' => 'n', + 'N̨' => 'N', + 'n̨' => 'n', + 'Ꞥ' => 'N', + 'ꞥ' => 'n', + 'Ň' => 'N', + 'ň' => 'n', + 'Ṅ' => 'N', + 'ṅ' => 'n', + 'Ṇ' => 'N', + 'ṇ' => 'n', + 'N̄' => 'N', + 'n̄' => 'n', + 'Ö' => 'O', + 'Ò' => 'O', + 'Ó' => 'O', + 'Ô' => 'O', + 'Õ' => 'O', + 'Ō' => 'O', + 'Ŏ' => 'O', + 'Ǒ' => 'O', + 'Ő' => 'O', + 'Ơ' => 'O', + 'Ø' => 'O', + 'Ǿ' => 'O', + 'ö' => 'o', + 'ò' => 'o', + 'ó' => 'o', + 'ô' => 'o', + 'õ' => 'o', + 'ō' => 'o', + 'ŏ' => 'o', + 'ǒ' => 'o', + 'ő' => 'o', + 'ơ' => 'o', + 'ø' => 'o', + 'ǿ' => 'o', + 'º' => 'o', + 'O̧' => 'O', + 'o̧' => 'o', + 'Ǫ' => 'O', + 'ǫ' => 'o', + 'Ɵ' => 'O', + 'ɵ' => 'o', + 'Ȯ' => 'O', + 'ȯ' => 'o', + 'Ọ' => 'O', + 'ọ' => 'o', + 'Ŕ' => 'R', + 'Ŗ' => 'R', + 'ŕ' => 'r', + 'ŗ' => 'r', + 'Ŝ' => 'S', + 'Ș' => 'S', + 'ș' => 's', + 'Ś' => 'S', + 'ś' => 's', + 'S̀' => 'S', + 's̀' => 's', + 'Ŝ̀' => 'S', + 'ŝ' => 's', + 'S̈' => 'S', + 's̈' => 's', + 'Ş' => 'S', + 'ş' => 's', + 'S̨' => 'S', + 's̨' => 's', + 'Ꞩ' => 'S', + 'ꞩ' => 's', + 'Š' => 'S', + 'š' => 's', + 'Ṡ' => 'S', + 'ṡ' => 's', + 'Ṣ' => 'S', + 'ṣ' => 's', + 'S̄' => 'S', + 's̄' => 's', + 'S̃' => 'S', + 's̃' => 's', + 'ſ' => 's', + 'Ţ' => 'T', + 'Ț' => 'T', + 'Ŧ' => 'T', + 'Þ' => 'TH', + 'ţ' => 't', + 'ț' => 't', + 'ŧ' => 't', + 'þ' => 'th', + 'T́' => 'T', + 't́' => 't', + 'T̀' => 'T', + 't̀' => 't', + 'T̂' => 'T', + 't̂' => 't', + 'T̈' => 'T', + 'ẗ' => 't', + 'T̨' => 'T', + 't̨' => 't', + 'Ⱦ' => 'T', + 'ⱦ' => 't', + 'Ť' => 'T', + 'ť' => 't', + 'Ṫ' => 'T', + 'ṫ' => 't', + 'Ṭ' => 'T', + 'ṭ' => 't', + 'T̄' => 'T', + 't̄' => 't', + 'T̃' => 'T', + 't̃' => 't', + 'Ü' => 'U', + 'Ù' => 'U', + 'Ú' => 'U', + 'Û' => 'U', + 'Ũ' => 'U', + 'Ŭ' => 'U', + 'Ű' => 'U', + 'Ų' => 'U', + 'Ư' => 'U', + 'Ǔ' => 'U', + 'Ǖ' => 'U', + 'Ǘ' => 'U', + 'Ǚ' => 'U', + 'Ǜ' => 'U', + 'ü' => 'u', + 'ù' => 'u', + 'ú' => 'u', + 'û' => 'u', + 'ũ' => 'u', + 'ŭ' => 'u', + 'ű' => 'u', + 'ų' => 'u', + 'ư' => 'u', + 'ǔ' => 'u', + 'ǖ' => 'u', + 'ǘ' => 'u', + 'ǚ' => 'u', + 'ǜ' => 'u', + 'U̧' => 'U', + 'u̧' => 'u', + 'Ʉ' => 'U', + 'ʉ' => 'u', + 'U̇' => 'U', + 'u̇' => 'u', + 'Ụ' => 'U', + 'ụ' => 'u', + 'Ū' => 'U', + 'ū' => 'u', + 'Ʊ' => 'U', + 'ʊ' => 'u', + 'Ŵ' => 'W', + 'ŵ' => 'w', + 'Ẁ' => 'W', + 'ẁ' => 'w', + 'Ẃ' => 'W', + 'ẃ' => 'w', + 'Ẅ' => 'W', + 'ẅ' => 'w', + 'Ѵ' => 'I', + 'ѵ' => 'i', + 'Ꙗ' => 'Ja', + 'ꙗ' => 'ja', + 'Є' => 'Je', + 'є' => 'je', + 'Ѥ' => 'Je', + 'ѥ' => 'je', + 'Ѕ' => 'Dz', + 'ѕ' => 'dz', + 'Ꙋ' => 'U', + 'ꙋ' => 'u', + 'Ѡ' => 'O', + 'ѡ' => 'o', + 'Ѿ' => 'Ot', + 'ѿ' => 'ot', + 'Ѫ' => 'U', + 'ѫ' => 'u', + 'Ѧ' => 'Ja', + 'ѧ' => 'ja', + 'Ѭ' => 'Ju', + 'ѭ' => 'ju', + 'Ѩ' => 'Ja', + 'ѩ' => 'Ja', + 'Ѯ' => 'Ks', + 'ѯ' => 'ks', + 'Ѱ' => 'Ps', + 'ѱ' => 'ps', + 'Х' => 'X', + 'х' => 'x', + 'Ý' => 'Y', + 'Ÿ' => 'Y', + 'Ŷ' => 'Y', + 'ý' => 'y', + 'ÿ' => 'y', + 'ŷ' => 'y', + 'Ỳ' => 'Y', + 'ỳ' => 'y', + 'Y̧' => 'Y', + 'y̧' => 'y', + 'Y̨' => 'Y', + 'y̨' => 'y', + 'Ɏ' => 'Y', + 'ɏ' => 'y', + 'Y̌' => 'Y', + 'y̌' => 'y', + 'Ẏ' => 'Y', + 'ẏ' => 'y', + 'Ỵ' => 'Y', + 'ỵ' => 'y', + 'Ȳ' => 'Y', + 'ȳ' => 'y', + 'Ỹ' => 'Y', + 'ỹ' => 'y', + 'Ź' => 'Z', + 'ź' => 'z', + 'Z̀' => 'Z', + 'z̀' => 'z', + 'Ẑ' => 'Z', + 'ẑ' => 'z', + 'Z̈' => 'Z', + 'z̈' => 'z', + 'Z̧' => 'Z', + 'z̧' => 'z', + 'Z̨' => 'Z', + 'z̨' => 'z', + 'Ƶ' => 'Z', + 'ƶ' => 'z', + 'Ž' => 'Z', + 'ž' => 'z', + 'Ż' => 'Z', + 'ż' => 'z', + 'Ẓ' => 'Z', + 'ẓ' => 'z', + 'Z̄' => 'Z', + 'z̄' => 'z', + 'Z̃' => 'Z', + 'z̃' => 'z', + ], + // whitespace chars + ' ' => [ + "\xc2\xa0" => ' ', // 'NO-BREAK SPACE' + "\xe1\x9a\x80" => ' ', // 'OGHAM SPACE MARK' + "\xe2\x80\x80" => ' ', // 'EN QUAD' + "\xe2\x80\x81" => ' ', // 'EM QUAD' + "\xe2\x80\x82" => ' ', // 'EN SPACE' + "\xe2\x80\x83" => ' ', // 'EM SPACE' + "\xe2\x80\x84" => ' ', // 'THREE-PER-EM SPACE' + "\xe2\x80\x85" => ' ', // 'FOUR-PER-EM SPACE' + "\xe2\x80\x86" => ' ', // 'SIX-PER-EM SPACE' + "\xe2\x80\x87" => ' ', // 'FIGURE SPACE' + "\xe2\x80\x88" => ' ', // 'PUNCTUATION SPACE' + "\xe2\x80\x89" => ' ', // 'THIN SPACE' + "\xe2\x80\x8a" => ' ', // 'HAIR SPACE' + "\xe2\x80\xa8" => ' ', // 'LINE SEPARATOR' + "\xe2\x80\xa9" => ' ', // 'PARAGRAPH SEPARATOR' + "\xe2\x80\x8b" => ' ', // 'ZERO WIDTH SPACE' + "\xe2\x80\xaf" => ' ', // 'NARROW NO-BREAK SPACE' + "\xe2\x81\x9f" => ' ', // 'MEDIUM MATHEMATICAL SPACE' + "\xe3\x80\x80" => ' ', // 'IDEOGRAPHIC SPACE' + "\xef\xbe\xa0" => ' ', // 'HALFWIDTH HANGUL FILLER' + ], + // commonly used in Word documents + 'msword' => [ + "\xc2\xab" => '<<', // « (U+00AB) in UTF-8 + "\xc2\xbb" => '>>', // » (U+00BB) in UTF-8 + "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8 + "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8 + "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8 + "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8 + "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8 + "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8 + "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8 + "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8 + "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8 + "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8 + "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8 + "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8 + "\xe2\x80\xa6" => '...', // … (U+2026) in UTF-8 + ], + // Currency + // + // url => https://en.wikipedia.org/wiki/Currency_symbol + 'currency_short' => [ + '€' => 'EUR', + '$' => '$', + '₢' => 'Cr', + '₣' => 'Fr.', + '£' => 'PS', + '₤' => 'L.', + 'ℳ' => 'M', + '₥' => 'mil', + '₦' => 'N', + '₧' => 'Pts', + '₨' => 'Rs', + 'රු' => 'LKR', + 'ரூ' => 'LKR', + '௹' => 'Rs', + 'रू' => 'NPR', + '₹' => 'Rs', + '૱' => 'Rs', + '₩' => 'W', + '₪' => 'NS', + '₸' => 'KZT', + '₫' => 'D', + '֏' => 'AMD', + '₭' => 'K', + '₺' => 'TL', + '₼' => 'AZN', + '₮' => 'T', + '₯' => 'Dr', + '₲' => 'PYG', + '₾' => 'GEL', + '₳' => 'ARA', + '₴' => 'UAH', + '₽' => 'RUB', + '₵' => 'GHS', + '₡' => 'CL', + '¢' => 'c', + '¥' => 'YEN', + '円' => 'JPY', + '৳' => 'BDT', + '元' => 'CNY', + '﷼' => 'SAR', + '៛' => 'KR', + '₠' => 'ECU', + '¤' => '$?', + '฿' => 'THB', + '؋' => 'AFN', + ], +]; |