aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/commerceguys/intl/scripts/generate_number_format_data.php
blob: a9d7d4849e60d552e04605c9e700e8b6634fd2e1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
<?php

/**
 * Generates the json files stored in resources/number_format.
 */

set_time_limit(0);
require __DIR__ . '/../vendor/autoload.php';

// Downloaded from https://github.com/unicode-cldr/cldr-localenames-full.git
$localeDirectory = __DIR__ . '/assets/cldr-localenames-full/main/';
$enLanguages = $localeDirectory . 'en/languages.json';
// Downloaded from https://github.com/unicode-cldr/cldr-numbers-full.git
$numbersDirectory = __DIR__ . '/assets/cldr-numbers-full/main/';

if (!is_dir($localeDirectory)) {
    die("The $localeDirectory directory was not found");
}
if (!is_dir($numbersDirectory)) {
    die("The $numbersDirectory directory was not found");
}
if (!file_exists($enLanguages)) {
    die("The $enLanguages file was not found");
}

$numberFormats = generate_number_formats();
$numberFormats = filter_duplicates($numberFormats);
// We treat 'en' as a generic definition, which allows
// us to strip any data that matches one of its keys.
foreach ($numberFormats as $locale => $numberFormat) {
    if ($locale != 'en') {
        $numberFormats[$locale] = array_diff_assoc($numberFormats[$locale], $numberFormats['en']);
    }
}

// Number formats are stored in PHP, then manually
// transferred to NumberFormatRepository.
$data = "<?php\n\n";
$data .= export_number_formats($numberFormats);
file_put_contents(__DIR__ . '/number_formats.php', $data);

echo "Done.\n";

/**
 * Exports number formats.
 */
function export_number_formats(array $numberFormats)
{
    $indent = '    ';
    $export = '// ' . count($numberFormats) . " available formats: \n";
    $export .= '$numberFormats = [' . "\n";
    foreach ($numberFormats as $locale => $numberFormat) {
        $locale = "'" . $locale . "'";
        $export .= $indent . $locale . " => [\n";
        foreach ($numberFormat as $key => $value) {
            $key = "'" . $key . "'";
            $value = "'" . $value . "'";
            $export .= $indent . $indent . $key . ' => ' . $value . ",\n";
        }
        $export .= "$indent],\n";
    }
    $export .= '];' . "\n\n";
    $export = str_replace("[\n$indent],", '[],', $export);

    return $export;
}

/**
 * Generates the number formats.
 */
function generate_number_formats()
{
    global $numbersDirectory;

    $numberFormats = [];
    foreach (discover_locales() as $locale) {
        $data = json_decode(file_get_contents($numbersDirectory . $locale . '/numbers.json'), true);
        $data = $data['main'][$locale]['numbers'];
        // Use the default numbering system, if it's supported.
        if (in_array($data['defaultNumberingSystem'], ['arab', 'arabext', 'beng', 'deva', 'latn'])) {
            $numberingSystem = $data['defaultNumberingSystem'];
        } else {
            $numberingSystem = 'latn';
        }

        $patterns = [
            'decimal' => $data['decimalFormats-numberSystem-' . $numberingSystem]['standard'],
            'percent' =>  $data['percentFormats-numberSystem-' . $numberingSystem]['standard'],
            'currency' => $data['currencyFormats-numberSystem-' . $numberingSystem]['standard'],
            'accounting' => $data['currencyFormats-numberSystem-' . $numberingSystem]['accounting'],
        ];
        // The "bg" patterns have no '#', confusing the formatter.
        foreach ($patterns as $key => $pattern) {
            if (strpos($pattern, '#') === false) {
                $patterns[$key] = str_replace('0.00', '#0.00', $pattern);
            }
        }

        $numberFormats[$locale] = [
            'numbering_system' => $numberingSystem,
            'decimal_pattern' => $patterns['decimal'],
            'percent_pattern' => $patterns['percent'],
            'currency_pattern' => $patterns['currency'],
            'accounting_currency_pattern' => $patterns['accounting'],
        ];
        // No need to export 'latn' since that is the default value.
        if ($numberFormats[$locale]['numbering_system'] != 'latn') {
            $numberFormats[$locale]['numbering_system'] = $numberingSystem;
        }

        // Add the symbols only if they're different from the default data.
        $decimalSeparator = $data['symbols-numberSystem-' . $numberingSystem]['decimal'];
        $groupingSeparator = $data['symbols-numberSystem-' . $numberingSystem]['group'];
        $plusSign = $data['symbols-numberSystem-' . $numberingSystem]['plusSign'];
        $minusSign = $data['symbols-numberSystem-' . $numberingSystem]['minusSign'];
        $percentSign = $data['symbols-numberSystem-' . $numberingSystem]['percentSign'];
        if ($decimalSeparator != '.') {
            $numberFormats[$locale]['decimal_separator'] = $decimalSeparator;
        }
        if ($groupingSeparator != ',') {
            $numberFormats[$locale]['grouping_separator'] = $groupingSeparator;
        }
        if ($plusSign != '+') {
            $numberFormats[$locale]['plus_sign'] = $plusSign;
        }
        if ($minusSign != '-') {
            $numberFormats[$locale]['minus_sign'] = $minusSign;
        }
        if ($percentSign != '%') {
            $numberFormats[$locale]['percent_sign'] = $percentSign;
        }
    }
    ksort($numberFormats);

    return $numberFormats;
}

/**
 * Filters out duplicate number formats (same as their parent locale).
 *
 * For example, "fr-FR" will be removed if "fr" has the same data.
 */
function filter_duplicates(array $numberFormats)
{
    $duplicates = [];
    foreach ($numberFormats as $locale => $numberFormat) {
        $parentNumberFormat = [];
        $parentLocale = \CommerceGuys\Intl\Locale::getParent($locale);
        if ($parentLocale && isset($numberFormats[$parentLocale])) {
            $parentNumberFormat = $numberFormats[$parentLocale];
        }

        $diff = array_diff_assoc($numberFormat, $parentNumberFormat);
        if (empty($diff)) {
            // The duplicates are not removed right away because they might
            // still be needed for other duplicate checks (for example,
            // when there are locales like bs-Latn-BA, bs-Latn, bs).
            $duplicates[] = $locale;
        }
    }
    // Remove the duplicates.
    foreach ($duplicates as $locale) {
        unset($numberFormats[$locale]);
    }

    return $numberFormats;
}

/**
 * Creates a list of available locales.
 */
function discover_locales()
{
    global $localeDirectory;

    // Locales listed without a "-" match all variants.
    // Locales listed with a "-" match only those exact ones.
    $ignoredLocales = [
        // Interlingua is a made up language.
        'ia',
        // Ignored by other generation scripts, very minor locales.
        'as', 'asa', 'bem', 'ccp', 'chr', 'dav', 'dua', 'ebu', 'ewo', 'guz', 'gv', 'ii',
        'jgo', 'jmc', 'kam', 'kde', 'ki', 'kkj', 'kl', 'kln', 'ksb', 'kw', 'lag',
        'ln', 'mer', 'mgo', 'nd', 'nmg', 'nnh', 'nus', 'os', 'ps', 'rwk', 'sah',
        'saq', 'sbp', 'shi', 'sn', 'teo', 'vai', 'vun', 'xog', 'zgh',
        // Special "grouping" locales.
        'root', 'en-US-POSIX',
    ];

    // Gather available locales.
    $locales = [];
    if ($handle = opendir($localeDirectory)) {
        while (false !== ($entry = readdir($handle))) {
            if (substr($entry, 0, 1) != '.') {
                $entryParts = explode('-', $entry);
                if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) {
                    $locales[] = $entry;
                }
            }
        }
        closedir($handle);
    }

    return $locales;
}