diff options
author | zotlabs <mike@macgirvin.com> | 2017-11-09 21:26:18 -0800 |
---|---|---|
committer | zotlabs <mike@macgirvin.com> | 2017-11-09 21:26:18 -0800 |
commit | 9b19e40a74a4a5d641015c04c1765d4bd4502bd1 (patch) | |
tree | 2352619895cfdad2609f9dfaefbe3ad657e9acd9 | |
parent | 766a7fb1b46a506e92d62ed0b76e0e0e3e6958a5 (diff) | |
download | volse-hubzilla-9b19e40a74a4a5d641015c04c1765d4bd4502bd1.tar.gz volse-hubzilla-9b19e40a74a4a5d641015c04c1765d4bd4502bd1.tar.bz2 volse-hubzilla-9b19e40a74a4a5d641015c04c1765d4bd4502bd1.zip |
hubzilla issue #901 - unicode characters in urls tripping up url regexes - these regexes have been modified to accept unicode "letters" which may preclude emojis and control sequences and symbols in url links; but should suffice for most legal URLs containing language context "text" glyphs.
-rw-r--r-- | include/activities.php | 2 | ||||
-rw-r--r-- | include/bbcode.php | 6 | ||||
-rw-r--r-- | include/markdown.php | 4 | ||||
-rw-r--r-- | include/text.php | 14 |
4 files changed, 13 insertions, 13 deletions
diff --git a/include/activities.php b/include/activities.php index 2671e668c..9b83f7a5c 100644 --- a/include/activities.php +++ b/include/activities.php @@ -50,7 +50,7 @@ function profile_activity($changed, $value) { if($t == 1 && strlen($value)) { // if it's a url, the HTML quotes will mess it up, so link it and don't try and zidify it because we don't know what it points to. - $value = preg_replace_callback("/([^\]\='".'"'."]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\+\,]+)/ism", 'red_zrl_callback', $value); + $value = preg_replace_callback("/([^\]\='".'"'."]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\+\,]+)/ismu", 'red_zrl_callback', $value); // take out the bookmark indicator if(substr($value,0,2) === '#^') $value = str_replace('#^','',$value); diff --git a/include/bbcode.php b/include/bbcode.php index 9a2a6eb9b..050ab2d29 100644 --- a/include/bbcode.php +++ b/include/bbcode.php @@ -838,13 +838,13 @@ function bbcode($Text, $preserve_nl = false, $tryoembed = true, $cache = false) // Perform URL Search - $urlchars = '[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]'; + $urlchars = '[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]'; if (strpos($Text,'http') !== false) { if($tryoembed) { - $Text = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/$urlchars+)/ism", 'tryoembed', $Text); + $Text = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/$urlchars+)/ismu", 'tryoembed', $Text); } - $Text = preg_replace("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/$urlchars+)/ism", '$1<a href="$2" target="_blank" rel="nofollow noopener">$2</a>', $Text); + $Text = preg_replace("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/$urlchars+)/ismu", '$1<a href="$2" target="_blank" rel="nofollow noopener">$2</a>', $Text); } if (strpos($Text,'[/share]') !== false) { diff --git a/include/markdown.php b/include/markdown.php index 865727b20..f398d279e 100644 --- a/include/markdown.php +++ b/include/markdown.php @@ -75,10 +75,10 @@ function markdown_to_bb($s, $use_zrl = false, $options = []) { // Convert everything that looks like a link to a link if($use_zrl) { $s = str_replace(['[img', '/img]'], ['[zmg', '/zmg]'], $s); - $s = preg_replace("/([^\]\=]|^)(https?\:\/\/)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]+)/ism", '$1[zrl=$2$3]$2$3[/zrl]',$s); + $s = preg_replace("/([^\]\=]|^)(https?\:\/\/)([a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]+)/ismu", '$1[zrl=$2$3]$2$3[/zrl]',$s); } else { - $s = preg_replace("/([^\]\=]|^)(https?\:\/\/)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]+)/ism", '$1[url=$2$3]$2$3[/url]',$s); + $s = preg_replace("/([^\]\=]|^)(https?\:\/\/)([a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]+)/ismu", '$1[url=$2$3]$2$3[/url]',$s); } // remove duplicate adjacent code tags diff --git a/include/text.php b/include/text.php index 746c35679..c74e515d2 100644 --- a/include/text.php +++ b/include/text.php @@ -819,7 +819,7 @@ function get_tags($s) { // added ; to single word tags to allow emojis and other unicode character constructs in bbcode // (this would actually be &#xnnnnn; but the ampersand will have been escaped to & by the time we see it.) - if(preg_match_all('/(?<![a-zA-Z0-9=\/\?])(@[^ \x0D\x0A,:?\[]+ [^ \x0D\x0A@,:?\[]+)/',$s,$match)) { + if(preg_match_all('/(?<![a-zA-Z0-9=\pL\/\?])(@[^ \x0D\x0A,:?\[]+ [^ \x0D\x0A@,:?\[]+)/u',$s,$match)) { foreach($match[1] as $mtch) { if(substr($mtch,-1,1) === '.') $ret[] = substr($mtch,0,-1); @@ -831,7 +831,7 @@ function get_tags($s) { // Otherwise pull out single word tags. These can be @nickname, @first_last // and #hash tags. - if(preg_match_all('/(?<![a-zA-Z0-9=\/\?\;])([@#\!][^ \x0D\x0A,;:?\[]+)/',$s,$match)) { + if(preg_match_all('/(?<![a-zA-Z0-9=\pL\/\?\;])([@#\!][^ \x0D\x0A,;:?\[]+)/u',$s,$match)) { foreach($match[1] as $mtch) { if(substr($mtch,-1,1) === '.') $mtch = substr($mtch,0,-1); @@ -1052,7 +1052,7 @@ function searchbox($s,$id='search-box',$url='/search',$save = false) { * @return string */ function linkify($s, $me = false) { - $s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\@\~\#\'\%\$\!\+\,\@]*)/", (($me) ? ' <a href="$1" rel="me" >$1</a>' : ' <a href="$1" >$1</a>'), $s); + $s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\_\@\~\#\'\%\$\!\+\,\@]*)/u", (($me) ? ' <a href="$1" rel="me" >$1</a>' : ' <a href="$1" >$1</a>'), $s); $s = preg_replace("/\<(.*?)(src|href)=(.*?)\&\;(.*?)\>/ism",'<$1$2=$3&$4>',$s); return($s); @@ -3099,10 +3099,10 @@ function cleanup_bbcode($body) { $body = preg_replace_callback('/\[zrl(.*?)\[\/(zrl)\]/ism','\red_escape_codeblock',$body); - $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\ -+\,\(\)]+)/ism", '\nakedoembed', $body); - $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\ -+\,\(\)]+)/ism", '\red_zrl_callback', $body); + $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\ ++\,\(\)]+)/ismu", '\nakedoembed', $body); + $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\ ++\,\(\)]+)/ismu", '\red_zrl_callback', $body); $body = preg_replace_callback('/\[\$b64zrl(.*?)\[\/(zrl)\]/ism','\red_unescape_codeblock',$body); $body = preg_replace_callback('/\[\$b64url(.*?)\[\/(url)\]/ism','\red_unescape_codeblock',$body); |