aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/commerceguys/intl/scripts/generate_language_data.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/commerceguys/intl/scripts/generate_language_data.php')
-rw-r--r--vendor/commerceguys/intl/scripts/generate_language_data.php217
1 files changed, 217 insertions, 0 deletions
diff --git a/vendor/commerceguys/intl/scripts/generate_language_data.php b/vendor/commerceguys/intl/scripts/generate_language_data.php
new file mode 100644
index 000000000..a7dbd836e
--- /dev/null
+++ b/vendor/commerceguys/intl/scripts/generate_language_data.php
@@ -0,0 +1,217 @@
+<?php
+
+/**
+ * Generates the json files stored in resources/language.
+ *
+ * CLDR lists about 515 languages, many of them dead (like Latin or Old English).
+ * In order to decrease the list to a reasonable size, only the languages
+ * for which CLDR itself has translations are listed.
+ */
+
+set_time_limit(0);
+require __DIR__ . '/../vendor/autoload.php';
+
+// Downloaded from https://github.com/unicode-cldr/cldr-localenames-full.git
+$localeDirectory = __DIR__ . '/assets/cldr-localenames-full/main/';
+$enLanguages = $localeDirectory . 'en/languages.json';
+
+if (!is_dir($localeDirectory)) {
+ die("The $localeDirectory directory was not found");
+}
+if (!file_exists($enLanguages)) {
+ die("The $enLanguages file was not found");
+}
+if (!function_exists('collator_create')) {
+ // Reimplementing intl's collator would be a huge undertaking, so we
+ // use it instead to presort the generated locale specific data.
+ die('The intl extension was not found.');
+}
+
+$languages = generate_languages();
+$languages = filter_duplicate_localizations($languages);
+
+// Make sure we're starting from a clean slate.
+if (is_dir(__DIR__ . '/language')) {
+ die('The language/ directory must not exist.');
+}
+
+// Prepare the filesystem.
+mkdir(__DIR__ . '/language');
+
+// Write out the localizations.
+foreach ($languages as $locale => $localizedLanguages) {
+ $collator = collator_create($locale);
+ uasort($localizedLanguages, function ($a, $b) use ($collator) {
+ return collator_compare($collator, $a, $b);
+ });
+ file_put_json(__DIR__ . '/language/' . $locale . '.json', $localizedLanguages);
+}
+
+$availableLocales = array_keys($languages);
+sort($availableLocales);
+// Available locales are stored in PHP, then manually
+// transferred to LanguageRepository.
+$data = "<?php\n\n";
+$data .= export_locales($availableLocales);
+file_put_contents(__DIR__ . '/language_data.php', $data);
+
+echo "Done.\n";
+
+/**
+ * Converts the provided data into json and writes it to the disk.
+ */
+function file_put_json($filename, $data)
+{
+ $data = json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
+ // Indenting with tabs instead of 4 spaces gives us 20% smaller files.
+ $data = str_replace(' ', "\t", $data);
+ file_put_contents($filename, $data);
+}
+
+/**
+ * Exports locales.
+ */
+function export_locales($data)
+{
+ // Wrap the values in single quotes.
+ $data = array_map(function ($value) {
+ return "'" . $value . "'";
+ }, $data);
+
+ $export = '// ' . count($data) . " available locales. \n";
+ $export .= '$locales = [' . "\n";
+ $export .= ' ' . implode(', ', $data) . "\n";
+ $export .= "];\n";
+
+ return $export;
+}
+
+/**
+ * Generates the language lists for each locale.
+ */
+function generate_languages()
+{
+ global $localeDirectory;
+
+ $locales = discover_locales();
+ // Make sure 'en' is processed first so that it can be used as a fallback.
+ $index = array_search('en', $locales);
+ unset($locales[$index]);
+ array_unshift($locales, 'en');
+ // The filtering of the language list against the locale list can be
+ // too strict, filtering out languages that should be in the final list.
+ // This override ensures that such cases are covered.
+ $explicitlyAllowed = ['wa'];
+ // Languages that are untranslated in most locales (as of CLDR v34).
+ $explicitlyIgnored = ['ccp', 'fa-AF'];
+
+ $untranslatedCounts = [];
+ $languages = [];
+ foreach ($locales as $locale) {
+ $data = json_decode(file_get_contents($localeDirectory . $locale . '/languages.json'), true);
+ $data = $data['main'][$locale]['localeDisplayNames']['languages'];
+ foreach ($data as $languageCode => $languageName) {
+ // Skip all languages that aren't an available locale at the same time.
+ // This reduces the language list from about 515 to about 185 languages.
+ if (!in_array($languageCode, $locales) && !in_array($languageCode, $explicitlyAllowed)) {
+ continue;
+ }
+ if (in_array($languageCode, $explicitlyIgnored)) {
+ continue;
+ }
+
+ // This language name is untranslated, use to the english version.
+ if ($languageCode == str_replace('_', '-', $languageName)) {
+ $languageName = $languages['en'][$languageCode];
+ // Maintain a count of untranslated languages per locale.
+ $untranslatedCounts += [$locale => 0];
+ $untranslatedCounts[$locale]++;
+ }
+
+ $languages[$locale][$languageCode] = $languageName;
+ }
+ // CLDR v34 has an uneven language list due to missing translations.
+ if ($locale != 'en') {
+ $missingLanguages = array_diff_key($languages['en'], $languages[$locale]);
+ foreach ($missingLanguages as $languageCode => $languageName) {
+ $languages[$locale][$languageCode] = $languages['en'][$languageCode];
+ }
+ }
+ }
+
+ // Ignore locales that are more than 80% untranslated.
+ foreach ($untranslatedCounts as $locale => $count) {
+ $totalCount = count($languages[$locale]);
+ $untranslatedPercentage = $count * (100 / $totalCount);
+ if ($untranslatedPercentage >= 80) {
+ unset($languages[$locale]);
+ }
+ }
+
+ return $languages;
+}
+
+/**
+ * Filters out duplicate localizations (same as their parent locale).
+ *
+ * For example, "fr-FR" will be removed if "fr" has the same data.
+ */
+function filter_duplicate_localizations(array $localizations)
+{
+ $duplicates = [];
+ foreach ($localizations as $locale => $localizedLanguages) {
+ if ($parentLocale = \CommerceGuys\Intl\Locale::getParent($locale)) {
+ $parentLanguages = isset($localizations[$parentLocale]) ? $localizations[$parentLocale] : [];
+ $diff = array_udiff($localizedLanguages, $parentLanguages, function ($first, $second) {
+ return ($first === $second) ? 0 : 1;
+ });
+
+ if (empty($diff)) {
+ // The duplicates are not removed right away because they might
+ // still be needed for other duplicate checks (for example,
+ // when there are locales like bs-Latn-BA, bs-Latn, bs).
+ $duplicates[] = $locale;
+ }
+ }
+ }
+ foreach ($duplicates as $locale) {
+ unset($localizations[$locale]);
+ }
+
+ return $localizations;
+}
+
+/**
+ * Creates a list of available locales.
+ */
+function discover_locales()
+{
+ global $localeDirectory;
+
+ // Locales listed without a "-" match all variants.
+ // Locales listed with a "-" match only those exact ones.
+ $ignoredLocales = [
+ // Interlingua is a made up language.
+ 'ia',
+ // Valencian differs from its parent only by a single character (è/é).
+ 'ca-ES-VALENCIA',
+ // Special "grouping" locales.
+ 'root', 'en-US-POSIX',
+ ];
+
+ // Gather available locales.
+ $locales = [];
+ if ($handle = opendir($localeDirectory)) {
+ while (false !== ($entry = readdir($handle))) {
+ if (substr($entry, 0, 1) != '.') {
+ $entryParts = explode('-', $entry);
+ if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) {
+ $locales[] = $entry;
+ }
+ }
+ }
+ closedir($handle);
+ }
+
+ return $locales;
+}