From 9cab8ae58a29ecf7387e6865aa170715caeabf04 Mon Sep 17 00:00:00 2001 From: Stefan Parviainen Date: Tue, 30 Dec 2014 19:57:12 +0100 Subject: Language names via intl library. Fixes #773 --- library/intl/scripts/country/generate.php | 162 ++++++++++++++++++++++++ library/intl/scripts/currency/generate.php | 153 ++++++++++++++++++++++ library/intl/scripts/language/generate.php | 129 +++++++++++++++++++ library/intl/scripts/number_format/generate.php | 107 ++++++++++++++++ 4 files changed, 551 insertions(+) create mode 100644 library/intl/scripts/country/generate.php create mode 100644 library/intl/scripts/currency/generate.php create mode 100644 library/intl/scripts/language/generate.php create mode 100644 library/intl/scripts/number_format/generate.php (limited to 'library/intl/scripts') diff --git a/library/intl/scripts/country/generate.php b/library/intl/scripts/country/generate.php new file mode 100644 index 000000000..879dc0a25 --- /dev/null +++ b/library/intl/scripts/country/generate.php @@ -0,0 +1,162 @@ + $countryName) { + if (is_numeric($countryCode) || in_array($countryCode, $ignoredCountries)) { + // Ignore continents, regions, uninhabited islands. + continue; + } + if (strpos($countryCode, '-alt-') !== FALSE) { + // Ignore alternative names. + continue; + } + + $baseData[$countryCode]['code'] = $countryCode; + // Countries are not guaranteed to have an alpha3 and/or numeric code. + if (isset($codeMappings[$countryCode]['_alpha3'])) { + $baseData[$countryCode]['three_letter_code'] = $codeMappings[$countryCode]['_alpha3']; + } + if (isset($codeMappings[$countryCode]['_numeric'])) { + $baseData[$countryCode]['numeric_code'] = $codeMappings[$countryCode]['_numeric']; + } + + // Determine the telephone code for this country. + if (in_array($countryCode, array('IC', 'EA'))) { + // "Canary Islands" and "Ceuta and Melilla" use Spain's. + $baseData[$countryCode]['telephone_code'] = $telephoneCodeData['ES'][0]['telephoneCountryCode']; + } elseif ($countryCode == 'XK') { + // Kosovo uses three telephone codes. Use Serbia's until that gets resolved. + $baseData[$countryCode]['telephone_code'] = $telephoneCodeData['RS'][0]['telephoneCountryCode']; + } elseif (isset($telephoneCodeData[$countryCode])) { + $baseData[$countryCode]['telephone_code'] = $telephoneCodeData[$countryCode][0]['telephoneCountryCode']; + } +} + +// Write out base.json. +ksort($baseData); +$json = json_encode($baseData, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); +file_put_contents('base.json', $json); + +// Gather available locales. +$locales = array(); +if ($handle = opendir('../json-full/main')) { + while (false !== ($entry = readdir($handle))) { + if (substr($entry, 0, 1) != '.') { + $entryParts = explode('-', $entry); + if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) { + $locales[] = $entry; + } + } + } + closedir($handle); +} + +// Create the localizations. +$countries = array(); +foreach ($locales as $locale) { + $data = json_decode(file_get_contents('../json-full/main/' . $locale . '/territories.json'), true); + $data = $data['main'][$locale]['localeDisplayNames']['territories']; + foreach ($data as $countryCode => $countryName) { + if (isset($baseData[$countryCode])) { + // This country name is untranslated, use the english version. + if ($countryCode == $countryName) { + $countryName = $countryData[$countryCode]; + } + + $countries[$locale][$countryCode] = array( + 'name' => $countryName, + ); + } + } +} + +// Identify localizations that are the same as the ones for the parent locale. +// For example, "fr-FR" if "fr" has the same data. +$duplicates = array(); +foreach ($countries as $locale => $localizedCountries) { + if (strpos($locale, '-') !== FALSE) { + $localeParts = explode('-', $locale); + array_pop($localeParts); + $parentLocale = implode('-', $localeParts); + $diff = array_udiff($localizedCountries, $countries[$parentLocale], function ($first, $second) { + return ($first['name'] == $second['name']) ? 0 : 1; + }); + + if (empty($diff)) { + // The duplicates are not removed right away because they might + // still be needed for other duplicate checks (for example, + // when there are locales like bs-Latn-BA, bs-Latn, bs). + $duplicates[] = $locale; + } + } +} +// Remove the duplicates. +foreach ($duplicates as $locale) { + unset($countries[$locale]); +} + +// Write out the localizations. +foreach ($countries as $locale => $localizedCountries) { + $collator = collator_create($locale); + uasort($localizedCountries, function($a, $b) use ($collator) { + return collator_compare($collator, $a['name'], $b['name']); + }); + + $json = json_encode($localizedCountries, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); + file_put_contents($locale . '.json', $json); +} diff --git a/library/intl/scripts/currency/generate.php b/library/intl/scripts/currency/generate.php new file mode 100644 index 000000000..723906f7a --- /dev/null +++ b/library/intl/scripts/currency/generate.php @@ -0,0 +1,153 @@ +CcyTbl->CcyNtry as $currency) { + $attributes = (array) $currency->CcyNm->attributes(); + if (!empty($attributes) && !empty($attributes['@attributes']['IsFund'])) { + // Ignore funds. + continue; + } + $currency = (array) $currency; + if (empty($currency['Ccy'])) { + // Ignore placeholders like "Antarctica". + continue; + } + if (substr($currency['CtryNm'], 0, 2) == 'ZZ' || in_array($currency['Ccy'], array('XUA', 'XSU', 'XDR'))) { + // Ignore special currencies. + continue; + } + + $currencyCode = $currency['Ccy']; + $baseData[$currencyCode] = array( + 'code' => $currencyCode, + 'numeric_code' => $currency['CcyNbr'], + ); + // Take the fraction digits from CLDR, not ISO, because it reflects real + // life usage more closely. If the digits aren't set, that means that the + // default value (2) should be used. + if (isset($currencyData[$currencyCode]['_digits'])) { + $fractionDigits = $currencyData[$currencyCode]['_digits']; + if ($fractionDigits != 2) { + $baseData[$currencyCode]['fraction_digits'] = $fractionDigits; + } + } +} + +// Write out base.json. +ksort($baseData); +$json = json_encode($baseData, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); +file_put_contents('base.json', $json); + +// Gather available locales. +$locales = array(); +if ($handle = opendir('../json-full/main')) { + while (false !== ($entry = readdir($handle))) { + if (substr($entry, 0, 1) != '.') { + $entryParts = explode('-', $entry); + if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) { + $locales[] = $entry; + } + } + } + closedir($handle); +} + +// Create the localizations. +$currencies = array(); +foreach ($locales as $locale) { + $data = json_decode(file_get_contents('../json-full/main/' . $locale . '/currencies.json'), true); + $data = $data['main'][$locale]['numbers']['currencies']; + foreach ($data as $currencyCode => $currency) { + if (isset($baseData[$currencyCode])) { + $currencies[$locale][$currencyCode] = array( + 'name' => $currency['displayName'], + 'symbol' => $currency['symbol'], + ); + } + } +} + +// Identify localizations that are the same as the ones for the parent locale. +// For example, "fr-FR" if "fr" has the same data. +$duplicates = array(); +foreach ($currencies as $locale => $localizedCurrencies) { + if (strpos($locale, '-') !== FALSE) { + $localeParts = explode('-', $locale); + array_pop($localeParts); + $parentLocale = implode('-', $localeParts); + $diff = array_udiff($localizedCurrencies, $currencies[$parentLocale], function ($first, $second) { + return ($first['name'] == $second['name']) ? 0 : 1; + }); + + if (empty($diff)) { + // The duplicates are not removed right away because they might + // still be needed for other duplicate checks (for example, + // when there are locales like bs-Latn-BA, bs-Latn, bs). + $duplicates[] = $locale; + } + } +} +// Remove the duplicates. +foreach ($duplicates as $locale) { + unset($currencies[$locale]); +} + +// Write out the localizations. +foreach ($currencies as $locale => $localizedCurrencies) { + $collator = collator_create($locale); + uasort($localizedCurrencies, function($a, $b) use ($collator) { + return collator_compare($collator, $a['name'], $b['name']); + }); + + $json = json_encode($localizedCurrencies, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); + file_put_contents($locale . '.json', $json); +} diff --git a/library/intl/scripts/language/generate.php b/library/intl/scripts/language/generate.php new file mode 100644 index 000000000..e11c8df49 --- /dev/null +++ b/library/intl/scripts/language/generate.php @@ -0,0 +1,129 @@ + $languageName) { + if (strpos($languageCode, '-alt-') === FALSE) { + $languages['en'][$languageCode] = array( + 'code' => $languageCode, + 'name' => $languageName, + ); + } +} + +// Gather available locales. +$locales = array(); +if ($handle = opendir('../json-full/main')) { + while (false !== ($entry = readdir($handle))) { + if (substr($entry, 0, 1) != '.') { + $entryParts = explode('-', $entry); + if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) { + $locales[] = $entry; + } + } + } + closedir($handle); +} + +// Remove all languages that aren't an available locale at the same time. +// This reduces the language list from about 515 to about 185 languages. +foreach ($languages['en'] as $languageCode => $languageData) { + if (!in_array($languageCode, $locales)) { + unset($languages['en'][$languageCode]); + } +} + +// Load the localizations. +foreach ($locales as $locale) { + $data = json_decode(file_get_contents('../json-full/main/' . $locale . '/languages.json'), true); + $data = $data['main'][$locale]['localeDisplayNames']['languages']; + foreach ($data as $languageCode => $languageName) { + if (isset($languages['en'][$languageCode])) { + // This language name is untranslated, use to the english version. + if ($languageCode == $languageName) { + $languageName = $languages['en'][$languageCode]['name']; + } + + $languages[$locale][$languageCode] = array( + 'code' => $languageCode, + 'name' => $languageName, + ); + } + } +} + +// Identify localizations that are the same as the ones for the parent locale. +// For example, "fr-FR" if "fr" has the same data. +$duplicates = array(); +foreach ($languages as $locale => $localizedLanguages) { + if (strpos($locale, '-') !== FALSE) { + $localeParts = explode('-', $locale); + array_pop($localeParts); + $parentLocale = implode('-', $localeParts); + $diff = array_udiff($localizedLanguages, $languages[$parentLocale], function ($first, $second) { + return ($first['name'] == $second['name']) ? 0 : 1; + }); + + if (empty($diff)) { + // The duplicates are not removed right away because they might + // still be needed for other duplicate checks (for example, + // when there are locales like bs-Latn-BA, bs-Latn, bs). + $duplicates[] = $locale; + } + } +} +// Remove the duplicates. +foreach ($duplicates as $locale) { + unset($languages[$locale]); +} + +// Write out the localizations. +foreach ($languages as $locale => $localizedLanguages) { + $collator = collator_create($locale); + uasort($localizedLanguages, function($a, $b) use ($collator) { + return collator_compare($collator, $a['name'], $b['name']); + }); + + $json = json_encode($localizedLanguages, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); + file_put_contents($locale . '.json', $json); +} diff --git a/library/intl/scripts/number_format/generate.php b/library/intl/scripts/number_format/generate.php new file mode 100644 index 000000000..4308b4dc7 --- /dev/null +++ b/library/intl/scripts/number_format/generate.php @@ -0,0 +1,107 @@ + $numberingSystem, + 'decimal_pattern' => $data['decimalFormats-numberSystem-' . $numberingSystem]['standard'], + 'percent_pattern' => $data['percentFormats-numberSystem-' . $numberingSystem]['standard'], + 'currency_pattern' => $data['currencyFormats-numberSystem-' . $numberingSystem]['standard'], + 'accounting_currency_pattern' => $data['currencyFormats-numberSystem-' . $numberingSystem]['accounting'], + ); + + // Add the symbols only if they're different from the default data. + $decimalSeparator = $data['symbols-numberSystem-' . $numberingSystem]['decimal']; + $groupingSeparator = $data['symbols-numberSystem-' . $numberingSystem]['group']; + $plusSign = $data['symbols-numberSystem-' . $numberingSystem]['plusSign']; + $minusSign = $data['symbols-numberSystem-' . $numberingSystem]['minusSign']; + $percentSign = $data['symbols-numberSystem-' . $numberingSystem]['percentSign']; + if ($decimalSeparator != '.') { + $numberFormats[$locale]['decimal_separator'] = $decimalSeparator; + } + if ($groupingSeparator != ',') { + $numberFormats[$locale]['grouping_separator'] = $groupingSeparator; + } + if ($plusSign != '+') { + $numberFormats[$locale]['plus_sign'] = $plusSign; + } + if ($minusSign != '-') { + $numberFormats[$locale]['minus_sign'] = $minusSign; + } + if ($percentSign != '%') { + $numberFormats[$locale]['percent_sign'] = $percentSign; + } +} + +// Identify localizations that are the same as the ones for the parent locale. +// For example, "fr-FR" if "fr" has the same data. +$duplicates = array(); +foreach ($numberFormats as $locale => $formatData) { + if (strpos($locale, '-') !== FALSE) { + $localeParts = explode('-', $locale); + array_pop($localeParts); + $parentLocale = implode('-', $localeParts); + $diff = array_diff_assoc($formatData, $numberFormats[$parentLocale]); + + if (empty($diff)) { + // The duplicates are not removed right away because they might + // still be needed for other duplicate checks (for example, + // when there are locales like bs-Latn-BA, bs-Latn, bs). + $duplicates[] = $locale; + } + } +} +// Remove the duplicates. +foreach ($duplicates as $locale) { + unset($numberFormats[$locale]); +} + +// Write out the data. +foreach ($numberFormats as $locale => $numberFormat) { + $json = json_encode($numberFormat, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE); + file_put_contents($locale . '.json', $json); +} -- cgit v1.2.3