aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/jbroadway/urlify/URLify.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/jbroadway/urlify/URLify.php')
-rw-r--r--vendor/jbroadway/urlify/URLify.php591
1 files changed, 591 insertions, 0 deletions
diff --git a/vendor/jbroadway/urlify/URLify.php b/vendor/jbroadway/urlify/URLify.php
new file mode 100644
index 000000000..be46bd83a
--- /dev/null
+++ b/vendor/jbroadway/urlify/URLify.php
@@ -0,0 +1,591 @@
+<?php
+
+/**
+ * A fast PHP slug generator and transliteration library, started as a PHP port of URLify.js
+ * from the Django project + fallback via "Portable ASCII".
+ *
+ * - https://github.com/django/django/blob/master/django/contrib/admin/static/admin/js/urlify.js
+ * - https://github.com/voku/portable-ascii
+ *
+ * Handles symbols from latin languages, Arabic, Azerbaijani, Bulgarian, Burmese, Croatian, Czech, Danish, Esperanto,
+ * Estonian, Finnish, French, Switzerland (French), Austrian (French), Georgian, German, Switzerland (German),
+ * Austrian (German), Greek, Hindi, Kazakh, Latvian, Lithuanian, Norwegian, Persian, Polish, Romanian, Russian, Swedish,
+ * Serbian, Slovak, Turkish, Ukrainian and Vietnamese ... and many other via "ASCII::to_transliterate()".
+ */
+class URLify
+{
+ /**
+ * The language-mapping array.
+ *
+ * ISO 639-1 codes: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
+ *
+ * @var array[]
+ */
+ public static $maps = [];
+
+ /**
+ * List of words to remove from URLs.
+ *
+ * @var array[]
+ */
+ public static $remove_list = [];
+
+ /**
+ * An array of strings that will convert into the separator-char - used by "URLify::filter()".
+ *
+ * @var string[]
+ */
+ private static $arrayToSeparator = [];
+
+ /**
+ * Add new strings the will be replaced with the separator.
+ *
+ * @param array $array <p>An array of things that should replaced by the separator.</p>
+ * @param bool $merge <p>Keep the previous (default) array-to-separator array.</p>
+ *
+ * @return void
+ *
+ * @psalm-param string[] $array
+ */
+ public static function add_array_to_separator(array $array, bool $merge = true)
+ {
+ if ($merge === true) {
+ self::$arrayToSeparator = \array_unique(
+ \array_merge(
+ self::$arrayToSeparator,
+ $array
+ )
+ );
+ } else {
+ self::$arrayToSeparator = $array;
+ }
+ }
+
+ /**
+ * Add new characters to the list. `$map` should be a hash.
+ *
+ * @param array $map
+ * @param string|null $language
+ *
+ * @return void
+ *
+ * @psalm-param array<string, string> $map
+ */
+ public static function add_chars(array $map, string $language = null)
+ {
+ $language_key = $language ?? \uniqid('urlify', true);
+
+ if (isset(self::$maps[$language_key])) {
+ self::$maps[$language_key] = \array_merge($map, self::$maps[$language_key]);
+ } else {
+ self::$maps[$language_key] = $map;
+ }
+ }
+
+ /**
+ * @return void
+ */
+ public static function reset_chars()
+ {
+ self::$maps = [];
+ }
+
+ /**
+ * Transliterates characters to their ASCII equivalents.
+ * $language specifies a priority for a specific language.
+ * The latter is useful if languages have different rules for the same character.
+ *
+ * @param string $string <p>The input string.</p>
+ * @param string $language <p>Your primary language.</p>
+ * @param string $unknown <p>Character use if character unknown. (default is ?).</p>
+ *
+ * @return string
+ */
+ public static function downcode(
+ string $string,
+ string $language = 'en',
+ string $unknown = ''
+ ): string {
+ $string = self::expandString($string, $language);
+
+ foreach (self::$maps as $mapsInner) {
+ foreach ($mapsInner as $orig => $replace) {
+ $string = \str_replace($orig, $replace, $string);
+ }
+ }
+
+ $string = \voku\helper\ASCII::to_ascii(
+ $string,
+ $language,
+ false,
+ true
+ );
+
+ return \voku\helper\ASCII::to_transliterate(
+ $string,
+ $unknown,
+ false
+ );
+ }
+
+ /**
+ * Convert a String to URL slug. Wraps <strong>filter()</strong> with a simpler
+ * set of defaults for typical usage in generating blog post slugs.
+ *
+ * @param string $string <p>The text you want to convert.</p>
+ * @param int $maxLength <p>Max. length of the output string, set to "0" (zero) to
+ * disable it</p>
+ * @param string $separator <p>Define a new separator for the words.</p>
+ * @param string $language <p>The language you want to convert to.</p>
+ */
+ public static function slug(
+ string $string,
+ int $maxLength = 200,
+ string $separator = '-',
+ string $language = 'en'
+ ): string {
+ return self::filter ($string, $maxLength, $language, false, false, true, $separator);
+ }
+
+ /**
+ * Convert a String to URL.
+ *
+ * e.g.: "Petty<br>theft" to "Petty-theft"
+ *
+ * @param string $string <p>The text you want to convert.</p>
+ * @param int $maxLength <p>Max. length of the output string, set to "0" (zero) to
+ * disable it</p>
+ * @param string $language <p>The language you want to convert to.</p>
+ * @param bool $fileName <p>
+ * Keep the "." from the extension e.g.: "imaäe.jpg" =>
+ * "image.jpg"
+ * </p>
+ * @param bool $removeWords <p>
+ * Remove some "words" from the string.<br />
+ * Info: Set extra words via <strong>remove_words()</strong>.
+ * </p>
+ * @param bool $strToLower <p>Use <strong>strtolower()</strong> at the end.</p>
+ * @param bool|string $separator <p>Define a new separator for the words.</p>
+ *
+ * @return string
+ */
+ public static function filter(
+ string $string,
+ int $maxLength = 200,
+ string $language = 'en',
+ bool $fileName = false,
+ bool $removeWords = false,
+ bool $strToLower = true,
+ $separator = '-'
+ ): string {
+ if ($string === '') {
+ return '';
+ }
+
+ // fallback
+ if ($language === '') {
+ $language = 'en';
+ }
+
+ // separator-fallback
+ if ($separator === false) {
+ $separator = '_';
+ }
+ if ($separator === true || $separator === '') {
+ $separator = '-';
+ }
+
+ // escaped separator
+ $separatorEscaped = \preg_quote($separator, '/');
+
+ // use defaults, if there are no values
+ if (self::$arrayToSeparator === []) {
+ self::reset_array_to_separator();
+ }
+
+ // remove apostrophes which are not used as quotes around a string
+ if (\strpos($string, "'") !== false) {
+ $stringTmp = \preg_replace("/(\w)'(\w)/u", '${1}${2}', $string);
+ if ($stringTmp !== null) {
+ $string = (string) $stringTmp;
+ }
+ }
+
+ // replace with $separator
+ $string = (string) \preg_replace(
+ self::$arrayToSeparator,
+ $separator,
+ $string
+ );
+
+ // remove all other html-tags
+ if (
+ \strpos($string, '<') !== false
+ ||
+ \strpos($string, '>') !== false
+ ) {
+ $string = \strip_tags($string);
+ }
+
+ // use special language replacer
+ $string = self::downcode($string, $language);
+
+ // replace with $separator, again
+ $string = (string) \preg_replace(
+ self::$arrayToSeparator,
+ $separator,
+ $string
+ );
+
+ // remove all these words from the string before urlifying
+ $removeWordsSearch = '//';
+ if ($removeWords === true) {
+ $removeList = self::get_remove_list($language);
+ if ($removeList !== []) {
+ $removeWordsSearch = '/\b(?:' . \implode('|', $removeList) . ')\b/ui';
+ }
+ }
+
+ // keep the "." from e.g.: a file-extension?
+ if ($fileName) {
+ $removePatternAddOn = '.';
+ } else {
+ $removePatternAddOn = '';
+ }
+
+ $string = (string) \preg_replace(
+ [
+ // 1) remove un-needed chars
+ '/[^' . $separatorEscaped . $removePatternAddOn . '\-a-zA-Z0-9\s]/u',
+ // 2) convert spaces to $separator
+ '/[\s]+/u',
+ // 3) remove some extras words
+ $removeWordsSearch,
+ // 4) remove double $separator's
+ '/[' . ($separatorEscaped ?: ' ') . ']+/u',
+ // 5) remove $separator at the end
+ '/[' . ($separatorEscaped ?: ' ') . ']+$/u',
+ ],
+ [
+ '',
+ $separator,
+ '',
+ $separator,
+ '',
+ ],
+ $string
+ );
+
+ // "substr" only if "$length" is set
+ if (
+ $maxLength
+ &&
+ $maxLength > 0
+ &&
+ \strlen($string) > $maxLength
+ ) {
+ $string = (string) \substr(\trim($string, $separator), 0, $maxLength);
+ }
+
+ // convert to lowercase
+ if ($strToLower === true) {
+ $string = \strtolower($string);
+ }
+
+ // trim "$separator" from beginning and end of the string
+ return \trim($string, $separator);
+ }
+
+ /**
+ * Append words to the remove list. Accepts either single words or an array of words.
+ *
+ * @param string|string[] $words
+ * @param string $language
+ * @param bool $merge <p>Keep the previous (default) remove-words array.</p>
+ *
+ * @return void
+ */
+ public static function remove_words($words, string $language = 'en', bool $merge = true)
+ {
+ if (\is_array($words) === false) {
+ $words = [$words];
+ }
+
+ foreach ($words as $removeWordKey => $removeWord) {
+ $words[$removeWordKey] = \preg_quote($removeWord, '/');
+ }
+
+ if ($merge === true) {
+ self::$remove_list[$language] = \array_unique(
+ \array_merge(
+ self::get_remove_list($language),
+ $words
+ )
+ );
+ } else {
+ self::$remove_list[$language] = $words;
+ }
+ }
+
+ /**
+ * Reset the internal "self::$arrayToSeparator" to the default values.
+ *
+ * @return void
+ */
+ public static function reset_array_to_separator()
+ {
+ self::$arrayToSeparator = [
+ '/&quot;|&amp;|&lt;|&gt;|&ndash;|&mdash;/i', // ", &, <, >, –, —
+ '/⁻|-|—|_|"|`|´|\'/',
+ "#/\r\n|\r|\n|<br.*/?>#isU",
+ ];
+ }
+
+ /**
+ * reset the word-remove-array
+ *
+ * @param string $language
+ *
+ * @return void
+ */
+ public static function reset_remove_list(string $language = 'en')
+ {
+ if ($language === '') {
+ return;
+ }
+
+ $language_orig = $language;
+ $language = self::get_language_for_reset_remove_list($language);
+ if ($language === '') {
+ return;
+ }
+
+ $stopWords = new \voku\helper\StopWords();
+
+ try {
+ self::$remove_list[$language_orig] = $stopWords->getStopWordsFromLanguage($language);
+ } catch (\voku\helper\StopWordsLanguageNotExists $e) {
+ self::$remove_list[$language_orig] = [];
+ }
+ }
+
+ /**
+ * Alias of `URLify::downcode()`.
+ *
+ * @param string $string
+ * @param string $language
+ *
+ * @return string
+ */
+ public static function transliterate(string $string, string $language = 'en'): string
+ {
+ return self::downcode($string, $language);
+ }
+
+ /**
+ * Expands the given string replacing some special parts for words.
+ * e.g. "lorem@ipsum.com" is replaced by "lorem at ipsum dot com".
+ *
+ * Most of these transformations have been inspired by the pelle/slugger
+ * project, distributed under the Eclipse Public License.
+ * Copyright 2012 Pelle Braendgaard
+ *
+ * @param string $string The string to expand
+ * @param string $language
+ *
+ * @return string The result of expanding the string
+ */
+ protected static function expandString(string $string, string $language = 'en'): string
+ {
+ $string = self::expandCurrencies($string, $language);
+
+ return self::expandSymbols($string, $language);
+ }
+
+ /**
+ * @param string $language
+ *
+ * @return string
+ */
+ private static function get_language_for_reset_remove_list(string $language)
+ {
+ if ($language === '') {
+ return '';
+ }
+
+ if (
+ \strpos($language, '_') === false
+ &&
+ \strpos($language, '-') === false
+ ) {
+ $language = \strtolower($language);
+ } else {
+ $regex = '/(?<first>[a-z]{2}).*/i';
+ $language = \strtolower((string) \preg_replace($regex, '$1', $language));
+ }
+
+ return $language;
+ }
+
+ /**
+ * Expands the numeric currencies in euros, dollars, pounds
+ * and yens that the given string may include.
+ *
+ * @param string $string
+ * @param string $language
+ *
+ * @return string
+ */
+ private static function expandCurrencies(string $string, string $language = 'en')
+ {
+ if (
+ \strpos($string, '€') === false
+ &&
+ \strpos($string, '$') === false
+ &&
+ \strpos($string, '£') === false
+ &&
+ \strpos($string, '¥') === false
+ ) {
+ return $string;
+ }
+
+ if ($language === 'de') {
+ return (string) \preg_replace(
+ [
+ '/(?:\s|^)(\d+)(?: )*€(?:\s|$)/',
+ '/(?:\s|^)\$(?: )*(\d+)(?:\s|$)/',
+ '/(?:\s|^)£(?: )*(\d+)(?:\s|$)/',
+ '/(?:\s|^)¥(?: )*(\d+)(?:\s|$)/',
+ '/(?:\s|^)(\d+)[.|,](\d+)(?: )*€(?:\s|$)/',
+ '/(?:\s|^)\$(?: )*(\d+)[.|,](\d+)(?:\s|$)/',
+ '/(?:\s|^)£(?: )*(\d+)[.|,](\d+)(?:\s|$)/',
+ ],
+ [
+ ' \1 Euro ',
+ ' \1 Dollar ',
+ ' \1 Pound ',
+ ' \1 Yen ',
+ ' \1 Euro \2 Cent ',
+ ' \1 Dollar \2 Cent ',
+ ' \1 Pound \2 Pence ',
+ ],
+ $string
+ );
+ }
+
+ return (string) \preg_replace(
+ [
+ '/(?:\s|^)1(?: )*€(?:\s|$)/',
+ '/(?:\s|^)(\d+)(?: )*€(?:\s|$)/',
+ '/(?:\s|^)\$(?: )*1(?:\s|$)/',
+ '/(?:\s|^)\$(?: )*(\d+)(?:\s|$)/',
+ '/(?:\s|^)£(?: )*1(?:\s|$)/',
+ '/(?:\s|^)£(?: )*(\d+)(?:\s|$)/',
+ '/(?:\s|^)¥(?: )*(\d+)(?:\s|$)/',
+ '/(?:\s|^)1[.|,](\d+)(?: )*€(?:\s|$)/',
+ '/(?:\s|^)(\d+)[.|,](\d+)(?: )*€(?:\s|$)/',
+ '/(?:\s|^)1[.|,](\d+)(?: )*$(?:\s|$)/',
+ '/(?:\s|^)\$(?: )*(\d+)[.|,](\d+)(?:\s|$)/',
+ '/(?:\s|^)1[.|,](\d+)(?: )*£(?:\s|$)/',
+ '/(?:\s|^)£(?: )*(\d+)[.|,](\d+)(?:\s|$)/',
+ ],
+ [
+ ' 1 Euro ',
+ ' \1 Euros ',
+ ' 1 Dollar ',
+ ' \1 Dollars ',
+ ' 1 Pound ',
+ ' \1 Pounds ',
+ ' \1 Yen ',
+ ' 1 Euros \1 Cents ',
+ ' \1 Euros \2 Cents ',
+ ' 1 Dollars \1 Cents ',
+ ' \1 Dollars \2 Cents ',
+ ' 1 Pounds \1 Pence ',
+ ' \1 Pounds \2 Pence ',
+ ],
+ $string
+ );
+ }
+
+ /**
+ * Expands the special symbols that the given string may include, such as '@', '.', '#' and '%'.
+ *
+ * @param string $string
+ * @param string $language
+ *
+ * @return string
+ */
+ private static function expandSymbols(string $string, string $language = 'en')
+ {
+ if (
+ \strpos($string, '©') === false
+ &&
+ \strpos($string, '®') === false
+ &&
+ \strpos($string, '@') === false
+ &&
+ \strpos($string, '&') === false
+ &&
+ \strpos($string, '%') === false
+ &&
+ \strpos($string, '=') === false
+ ) {
+ return $string;
+ }
+
+ $maps = \voku\helper\ASCII::charsArray(true);
+
+ return (string) \preg_replace(
+ [
+ '/\s*©\s*/',
+ '/\s*®\s*/',
+ '/\s*@\s*/',
+ '/\s*&\s*/',
+ '/\s*%\s*/',
+ '/(\s*=\s*)/',
+ ],
+ [
+ $maps['latin_symbols']['©'],
+ $maps['latin_symbols']['®'],
+ $maps['latin_symbols']['@'],
+ $maps[$language]['&'] ?? '&',
+ $maps[$language]['%'] ?? '%',
+ $maps[$language]['='] ?? '=',
+ ],
+ $string
+ );
+ }
+
+ /**
+ * return the "self::$remove_list[$language]" array
+ *
+ * @param string $language
+ *
+ * @return array<mixed>
+ */
+ private static function get_remove_list(string $language = 'en')
+ {
+ // check for language
+ if ($language === '') {
+ return [];
+ }
+
+ // set remove-array
+ if (!isset(self::$remove_list[$language])) {
+ self::reset_remove_list($language);
+ }
+
+ // check for array
+ if (
+ !isset(self::$remove_list[$language])
+ ||
+ empty(self::$remove_list[$language])
+ ) {
+ return [];
+ }
+
+ return self::$remove_list[$language];
+ }
+}