Merge branch 'fix-cleanup-bbcode-url-regex' into 'dev'

Fix naked URLs immediately followed by a newline See merge request hubzilla/core!2150
author: Mario <mario@mariovavti.com> 2024-10-12 17:05:19 +0000
committer: Mario <mario@mariovavti.com> 2024-10-12 17:05:19 +0000
commit: 40a9989be2366f30a37c8d451bae74a1d304319b (patch)
tree: 246ebe0acd3cc6a2f237add5d2d7e7ade8c6f9e2
parent: 05a5b644bf043aee941d17ac3fb254c99b1c64d1 (diff)
parent: d1648927b52b537314b470fdb9b5001ed6809c4f (diff)
download: volse-hubzilla-40a9989be2366f30a37c8d451bae74a1d304319b.tar.gz
volse-hubzilla-40a9989be2366f30a37c8d451bae74a1d304319b.tar.bz2
volse-hubzilla-40a9989be2366f30a37c8d451bae74a1d304319b.zip
4 files changed, 47 insertions, 29 deletions
diff --git a/include/text.php b/include/text.php
index 137622b7d..e69ce7d10 100644
--- a/include/text.php
+++ b/include/text.php
@@ -3755,12 +3755,9 @@ function cleanup_bbcode($body) {
 	$body = preg_replace_callback('/\[img(.*?)\[\/(img)\]/ism','\red_escape_codeblock',$body);
 	$body = preg_replace_callback('/\[zmg(.*?)\[\/(zmg)\]/ism','\red_escape_codeblock',$body);
 
-	$body = preg_replace_callback("/([^\]\='".'"'."\;\/\{]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\
-+\,\(\)]+)/ismu", '\nakedoembed', $body);
-
-	$body = preg_replace_callback("/([^\]\='".'"'."\;\/\{]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\
-+\,\(\)]+)/ismu", '\red_zrl_callback', $body);
+	$body = preg_replace_callback("/([^\]\='".'"'."\;\/\{]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\+\,\(\)]+)/ismu", '\nakedoembed', $body);
 
+	$body = preg_replace_callback("/([^\]\='".'"'."\;\/\{]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\+\,\(\)]+)/ismu", '\red_zrl_callback', $body);
 
 	$body = preg_replace_callback('/\[\$b64code(.*?)\[\/(code)\]/ism','\red_unescape_codeblock',$body);
 	$body = preg_replace_callback('/\[\$b64summary(.*?)\[\/(summary)\]/ism','\red_unescape_codeblock',$body);
diff --git a/include/zid.php b/include/zid.php
index 159a3b834..2b5d53916 100644
--- a/include/zid.php
+++ b/include/zid.php
@@ -261,25 +261,25 @@ function zidify_text($s) {
  */
 function red_zrl_callback($matches) {
 
-    // Catch and exclude trailing punctuation
-    preg_match("/[.,;:!?)]*$/i", $matches[2], $pts);
-    $matches[2] = substr($matches[2], 0, strlen($matches[2])-strlen($pts[0]));
+	// Catch and exclude trailing punctuation
+	preg_match("/[.,;:!?)]*$/i", $matches[2], $pts);
+	$matches[2] = substr($matches[2], 0, strlen($matches[2])-strlen($pts[0]));
 
-    $zrl = is_matrix_url($matches[2]);
+	$zrl = is_matrix_url($matches[2]);
 
-    $t = strip_zids($matches[2]);
-    if($t !== $matches[2]) {
-        $zrl = true;
-        $matches[2] = $t;
-    }
+	$t = strip_zids($matches[2]);
+	if($t !== $matches[2]) {
+		$zrl = true;
+		$matches[2] = $t;
+	}
 
-    if($matches[1] === '#^')
-        $matches[1] = '';
+	if($matches[1] === '#^')
+		$matches[1] = '';
 
-    if($zrl)
-        return $matches[1] . '#^[zrl=' . $matches[2] . ']' . $matches[2] . '[/zrl]' . $pts[0];
+	if($zrl)
+		return $matches[1] . '#^[zrl=' . $matches[2] . ']' . $matches[2] . '[/zrl]' . $pts[0];
 
-    return $matches[1] . '#^[url=' . $matches[2] . ']' . $matches[2] . '[/url]' . $pts[0];
+	return $matches[1] . '#^[url=' . $matches[2] . ']' . $matches[2] . '[/url]' . $pts[0];
 }
 
 /**
diff --git a/tests/unit/includes/BBCodeTest.php b/tests/unit/includes/BBCodeTest.php
index c6a60f35b..136fc6e0e 100644
--- a/tests/unit/includes/BBCodeTest.php
+++ b/tests/unit/includes/BBCodeTest.php
@@ -143,6 +143,14 @@ class BBCodeTest extends UnitTestCase {
 				'example url: https://example.com',
 				'example url: <a href="https://example.com"  target="_blank"  rel="nofollow noopener">https://example.com</a>'
 			],
+			'naked url followed by newline' => [
+				"https://www.example.com\nhave a great day.",
+				'<a href="https://www.example.com"  target="_blank"  rel="nofollow noopener">https://www.example.com</a><br />have a great day.',
+			],
+			'inline naked url' => [
+				"This is a link https://example.com/some/path more info.",
+				'This is a link <a href="https://example.com/some/path"  target="_blank"  rel="nofollow noopener">https://example.com/some/path</a> more info.',
+			],
 			'naked url within code block is not converted to link' => [
 				"[code]\nhttp://example.com\n[/code]",
 				"<pre><code>http://example.com</code></pre>"
diff --git a/tests/unit/includes/MarkdownTest.php b/tests/unit/includes/MarkdownTest.php
index 217d12ca2..55dbb4445 100644
--- a/tests/unit/includes/MarkdownTest.php
+++ b/tests/unit/includes/MarkdownTest.php
@@ -36,7 +36,7 @@ class MarkdownTest extends UnitTestCase {
 	 * @dataProvider markdown_to_bbcode_provider
 	 */
 	public function test_markdown_to_bbcode(string $expected, string $src): void {
-		$this->assertEquals($expected, markdown_to_bb($src));
+		$this->assertEquals($expected, markdown_to_bb($src, true, ['preserve_lf' => true]));
 	}
 
 	public static function markdown_to_bbcode_provider(): array {
@@ -54,11 +54,14 @@ class MarkdownTest extends UnitTestCase {
 				'This is a test of **bold text**, *italic text* and ***bold and italic text***'
 			],
 			'multiline text' => [
-				'This text is text wrapped over multiple lines.',
+				// This is not as expected in markdown, but may be needed
+				// for compatibility with bbcode behaviour.
+				"This text is\ntext wrapped\nover multiple\nlines.",
 				"This text is\ntext wrapped\nover multiple\nlines."
 			],
 			'text with hard linebreak' => [
-				"Line one\nLine two",
+				// An extra line break is inserted here...
+				"Line one\n\nLine two",
 				"Line one  \nLine two"
 			],
 			'paragraphs' => [
@@ -78,29 +81,39 @@ class MarkdownTest extends UnitTestCase {
 				'`some code`'
 			],
 			'inline code with wrapped text' => [
-				'[code]some code unwrapped[/code]',
+				// Not sure if the newline should be preseved here?
+				"[code]some code\nunwrapped[/code]",
 				"`some code\n   unwrapped`"
 			],
 			'code block no language' => [
-				"[code]some code\nover multiple lines[/code]",
+				"[code]some code\nover multiple lines\n[/code]",
 				"```\nsome code\nover multiple lines\n```"
 			],
 			'code block no language indented' => [
-				"[code]some code\n    over multiple lines\n    with indentation[/code]",
+				// For some reason one space char is eaten on indented lines.
+				"[code]some code\n   over multiple lines\n   with indentation\n[/code]",
 				"```\nsome code\n    over multiple lines\n    with indentation\n```"
 			],
 			'code block with language' => [
-				"[code=php]&lt;?php\necho phpinfo();[/code]",
+				"[code=php]&lt;?php\necho phpinfo();\n[/code]",
 				"```php\n<?php\necho phpinfo();\n```"
 			],
 			'code block with URL' => [
-				"[code]an example url https://example.com[/code]",
+				"[code]an example url https://example.com\n[/code]",
 				"```\nan example url https://example.com\n```"
 			],
 			'bbcode code block with URL' => [
-				"[code] proxy_pass http://example.com; [/code]",
+				"[code]\nproxy_pass http://example.com;\n[/code]",
 				"[code]\nproxy_pass http://example.com;\n[/code]"
-			]
+			],
+			'naked url followed by newline' => [
+				"https://example.com\nhave a great day.",
+				"https://example.com\nhave a great day.",
+			],
+			'inline naked url' => [
+				'This is a link https://example.com/some/path more info.',
+				'This is a link https://example.com/some/path more info.',
+			],
 		];
 	}
author	Mario <mario@mariovavti.com>	2024-10-12 17:05:19 +0000
committer	Mario <mario@mariovavti.com>	2024-10-12 17:05:19 +0000
commit	40a9989be2366f30a37c8d451bae74a1d304319b (patch)
tree	246ebe0acd3cc6a2f237add5d2d7e7ade8c6f9e2
parent	05a5b644bf043aee941d17ac3fb254c99b1c64d1 (diff)
parent	d1648927b52b537314b470fdb9b5001ed6809c4f (diff)
download	volse-hubzilla-40a9989be2366f30a37c8d451bae74a1d304319b.tar.gz volse-hubzilla-40a9989be2366f30a37c8d451bae74a1d304319b.tar.bz2 volse-hubzilla-40a9989be2366f30a37c8d451bae74a1d304319b.zip