aboutsummaryrefslogtreecommitdiffstats
path: root/include/text.php
diff options
context:
space:
mode:
authorzotlabs <mike@macgirvin.com>2017-11-09 21:26:18 -0800
committerzotlabs <mike@macgirvin.com>2017-11-09 21:26:18 -0800
commit9b19e40a74a4a5d641015c04c1765d4bd4502bd1 (patch)
tree2352619895cfdad2609f9dfaefbe3ad657e9acd9 /include/text.php
parent766a7fb1b46a506e92d62ed0b76e0e0e3e6958a5 (diff)
downloadvolse-hubzilla-9b19e40a74a4a5d641015c04c1765d4bd4502bd1.tar.gz
volse-hubzilla-9b19e40a74a4a5d641015c04c1765d4bd4502bd1.tar.bz2
volse-hubzilla-9b19e40a74a4a5d641015c04c1765d4bd4502bd1.zip
hubzilla issue #901 - unicode characters in urls tripping up url regexes - these regexes have been modified to accept unicode "letters" which may preclude emojis and control sequences and symbols in url links; but should suffice for most legal URLs containing language context "text" glyphs.
Diffstat (limited to 'include/text.php')
-rw-r--r--include/text.php14
1 files changed, 7 insertions, 7 deletions
diff --git a/include/text.php b/include/text.php
index 746c35679..c74e515d2 100644
--- a/include/text.php
+++ b/include/text.php
@@ -819,7 +819,7 @@ function get_tags($s) {
// added ; to single word tags to allow emojis and other unicode character constructs in bbcode
// (this would actually be &#xnnnnn; but the ampersand will have been escaped to &amp; by the time we see it.)
- if(preg_match_all('/(?<![a-zA-Z0-9=\/\?])(@[^ \x0D\x0A,:?\[]+ [^ \x0D\x0A@,:?\[]+)/',$s,$match)) {
+ if(preg_match_all('/(?<![a-zA-Z0-9=\pL\/\?])(@[^ \x0D\x0A,:?\[]+ [^ \x0D\x0A@,:?\[]+)/u',$s,$match)) {
foreach($match[1] as $mtch) {
if(substr($mtch,-1,1) === '.')
$ret[] = substr($mtch,0,-1);
@@ -831,7 +831,7 @@ function get_tags($s) {
// Otherwise pull out single word tags. These can be @nickname, @first_last
// and #hash tags.
- if(preg_match_all('/(?<![a-zA-Z0-9=\/\?\;])([@#\!][^ \x0D\x0A,;:?\[]+)/',$s,$match)) {
+ if(preg_match_all('/(?<![a-zA-Z0-9=\pL\/\?\;])([@#\!][^ \x0D\x0A,;:?\[]+)/u',$s,$match)) {
foreach($match[1] as $mtch) {
if(substr($mtch,-1,1) === '.')
$mtch = substr($mtch,0,-1);
@@ -1052,7 +1052,7 @@ function searchbox($s,$id='search-box',$url='/search',$save = false) {
* @return string
*/
function linkify($s, $me = false) {
- $s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\@\~\#\'\%\$\!\+\,\@]*)/", (($me) ? ' <a href="$1" rel="me" >$1</a>' : ' <a href="$1" >$1</a>'), $s);
+ $s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\_\@\~\#\'\%\$\!\+\,\@]*)/u", (($me) ? ' <a href="$1" rel="me" >$1</a>' : ' <a href="$1" >$1</a>'), $s);
$s = preg_replace("/\<(.*?)(src|href)=(.*?)\&amp\;(.*?)\>/ism",'<$1$2=$3&$4>',$s);
return($s);
@@ -3099,10 +3099,10 @@ function cleanup_bbcode($body) {
$body = preg_replace_callback('/\[zrl(.*?)\[\/(zrl)\]/ism','\red_escape_codeblock',$body);
- $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\
-+\,\(\)]+)/ism", '\nakedoembed', $body);
- $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\
-+\,\(\)]+)/ism", '\red_zrl_callback', $body);
+ $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\
++\,\(\)]+)/ismu", '\nakedoembed', $body);
+ $body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\
++\,\(\)]+)/ismu", '\red_zrl_callback', $body);
$body = preg_replace_callback('/\[\$b64zrl(.*?)\[\/(zrl)\]/ism','\red_unescape_codeblock',$body);
$body = preg_replace_callback('/\[\$b64url(.*?)\[\/(url)\]/ism','\red_unescape_codeblock',$body);