From f7e3d0dbf6a763557d3431c6d12a6dde1be5648b Mon Sep 17 00:00:00 2001 From: zotlabs Date: Thu, 9 Nov 2017 21:26:18 -0800 Subject: hubzilla issue #901 - unicode characters in urls tripping up url regexes - these regexes have been modified to accept unicode "letters" which may preclude emojis and control sequences and symbols in url links; but should suffice for most legal URLs containing language context "text" glyphs. --- include/text.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/text.php') diff --git a/include/text.php b/include/text.php index 746c35679..c74e515d2 100644 --- a/include/text.php +++ b/include/text.php @@ -819,7 +819,7 @@ function get_tags($s) { // added ; to single word tags to allow emojis and other unicode character constructs in bbcode // (this would actually be &#xnnnnn; but the ampersand will have been escaped to & by the time we see it.) - if(preg_match_all('/(?$1' : ' $1'), $s); + $s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\_\@\~\#\'\%\$\!\+\,\@]*)/u", (($me) ? ' $1' : ' $1'), $s); $s = preg_replace("/\<(.*?)(src|href)=(.*?)\&\;(.*?)\>/ism",'<$1$2=$3&$4>',$s); return($s); @@ -3099,10 +3099,10 @@ function cleanup_bbcode($body) { $body = preg_replace_callback('/\[zrl(.*?)\[\/(zrl)\]/ism','\red_escape_codeblock',$body); - $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\ -+\,\(\)]+)/ism", '\nakedoembed', $body); - $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\ -+\,\(\)]+)/ism", '\red_zrl_callback', $body); + $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\ ++\,\(\)]+)/ismu", '\nakedoembed', $body); + $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\ ++\,\(\)]+)/ismu", '\red_zrl_callback', $body); $body = preg_replace_callback('/\[\$b64zrl(.*?)\[\/(zrl)\]/ism','\red_unescape_codeblock',$body); $body = preg_replace_callback('/\[\$b64url(.*?)\[\/(url)\]/ism','\red_unescape_codeblock',$body); -- cgit v1.2.3