From 1881029040245bb5b097ccdc3da64ef7581e9169 Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Tue, 6 Feb 2024 11:41:37 +0100 Subject: Clean up markdowntest and fix bb to markdown test. --- tests/unit/includes/MarkdownTest.php | 66 ++++++++++++++---------------------- 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/tests/unit/includes/MarkdownTest.php b/tests/unit/includes/MarkdownTest.php index 953305074..25c947824 100644 --- a/tests/unit/includes/MarkdownTest.php +++ b/tests/unit/includes/MarkdownTest.php @@ -1,30 +1,29 @@ assertEquals($markdown, html2markdown($html)); } - public function html2markdownProvider() { + public function html2markdownProvider(): array { return [ 'empty text' => [ '', @@ -125,23 +122,10 @@ class MarkdownTest extends UnitTestCase { ]; } - /*public function testHtml2markdownException() { - //$this->expectException(\InvalidArgumentException::class); - // need to stub logger() for this to work - $this->assertEquals('', html2markdown('<getFunctionMock(__NAMESPACE__, "bbcode"); - $bbc->expects($this->once())->willReturn('testbold
i
'); + public function test_bb_to_markdown(): void { + $input = "test[b]bold[/b]\n[i]i[/i][ul][li]li1[/li][li]li2[/li][/ul]\n"; + $expected = "test**bold** \n*i*\n\n- li1\n- li2"; - $this->assertEquals($bb1, bb2diaspora($html1)); + $this->assertEquals($expected, bb_to_markdown($input)); } -*/ } -- cgit v1.2.3 From eb6a143fffba5a796945a425b289e95f7bd28d00 Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Tue, 6 Feb 2024 16:19:30 +0100 Subject: Add some tests for markdown to bbcode conversion. --- tests/unit/includes/MarkdownTest.php | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/unit/includes/MarkdownTest.php b/tests/unit/includes/MarkdownTest.php index 25c947824..71f13cdb0 100644 --- a/tests/unit/includes/MarkdownTest.php +++ b/tests/unit/includes/MarkdownTest.php @@ -31,6 +31,43 @@ require_once 'include/markdown.php'; * @brief Unit Test case for markdown functions. */ class MarkdownTest extends UnitTestCase { + + /** + * @dataProvider markdown_to_bbcode_provider + */ + public function test_markdown_to_bbcode(string $expected, string $src): void { + $this->assertEquals($expected, markdown_to_bb($src)); + } + + private function markdown_to_bbcode_provider(): array { + return [ + 'empty text' => [ + '', + '' + ], + 'plain text' => [ + 'This is a test', + 'This is a test' + ], + 'bold and italic' => [ + 'This is a test of [b]bold text[/b], [i]italic text[/i] and [b][i]bold and italic text[/i][/b]', + 'This is a test of **bold text**, *italic text* and ***bold and italic text***' + ], + 'multiline text' => [ + 'This text is text wrapped over multiple lines.', + "This text is\ntext wrapped\nover multiple\nlines." + ], + 'paragraphs' => [ + "Paragraph one\n\nParagraph two", + "Paragraph one\n\nParagraph two", + ], + 'inline image' => [ + '[img=https://example.com/image.jpg]https://example.com/image.jpg[/img]', + '![](https://example.com/image.jpg)' + ], + ]; + } + /** * @covers ::html2markdown * @dataProvider html2markdownProvider -- cgit v1.2.3 From 983f063d33efbf2c6fb43c3aa12562b464b2cb9c Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Tue, 6 Feb 2024 16:23:39 +0100 Subject: Fix image with alt text for html/md to bbcode --- include/html2bbcode.php | 1 + tests/unit/includes/BBCodeTest.php | 46 ++++++++++++++++++++++++++++++++++++ tests/unit/includes/MarkdownTest.php | 4 ++++ 3 files changed, 51 insertions(+) create mode 100644 tests/unit/includes/BBCodeTest.php diff --git a/include/html2bbcode.php b/include/html2bbcode.php index aca3ff4f8..5cb153a77 100644 --- a/include/html2bbcode.php +++ b/include/html2bbcode.php @@ -219,6 +219,7 @@ function html2bbcode($message) node2bbcode($doc, 'a', array('href'=>'/(.+)/'), '[url=$1]', '[/url]'); node2bbcode($doc, 'img', array('src'=>'/(.+)/', 'width'=>'/(\d+)/', 'height'=>'/(\d+)/'), '[img=$2x$3]$1', '[/img]'); + node2bbcode($doc, 'img', array('src'=>'/(.+)/', 'alt'=>'/(.+)/'), '[img=$1]$2', '[/img]'); node2bbcode($doc, 'img', array('src'=>'/(.+)/'), '[img]$1', '[/img]'); diff --git a/tests/unit/includes/BBCodeTest.php b/tests/unit/includes/BBCodeTest.php new file mode 100644 index 000000000..9e9b1a33b --- /dev/null +++ b/tests/unit/includes/BBCodeTest.php @@ -0,0 +1,46 @@ +assertEquals($expected, html2bbcode($src)); + } + + private function html2bbcode_provider(): array { + return [ + 'image with alt text' => [ + 'Alt text', + '[img=https://example.com/image.jpg]Alt text[/img]' + ], + ]; + } +} diff --git a/tests/unit/includes/MarkdownTest.php b/tests/unit/includes/MarkdownTest.php index 71f13cdb0..e05cec6fb 100644 --- a/tests/unit/includes/MarkdownTest.php +++ b/tests/unit/includes/MarkdownTest.php @@ -65,6 +65,10 @@ class MarkdownTest extends UnitTestCase { '[img=https://example.com/image.jpg]https://example.com/image.jpg[/img]', '![](https://example.com/image.jpg)' ], + 'inline image with alt text' => [ + '[img=https://example.com/image.jpg]Alt text[/img]', + '![Alt text](https://example.com/image.jpg)' + ], ]; } -- cgit v1.2.3 From ec19ee9d82a9d06e5b86fcb58329767226b0676f Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Tue, 6 Feb 2024 18:39:51 +0100 Subject: Fix convert code blocs from markdown/html to bbcode --- include/html2bbcode.php | 11 +++++++++-- tests/unit/includes/BBCodeTest.php | 4 ++++ tests/unit/includes/MarkdownTest.php | 8 ++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/include/html2bbcode.php b/include/html2bbcode.php index 5cb153a77..f75a3e428 100644 --- a/include/html2bbcode.php +++ b/include/html2bbcode.php @@ -127,8 +127,15 @@ function html2bbcode($message) $xpath = new DomXPath($doc); $list = $xpath->query("//pre"); - foreach ($list as $node) - $node->nodeValue = str_replace("\n", "\r", $node->nodeValue); + foreach ($list as $node) { + if ($node->hasChildNodes()) { + foreach ($node->childNodes as $child) { + $child->nodeValue = str_replace("\n", "\r", $child->nodeValue); + } + } else { + $node->nodeValue = str_replace("\n", "\r", $node->nodeValue); + } + } $message = $doc->saveHTML(); $message = str_replace(array("\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"), array("<", ">", "
", " ", ""), $message); diff --git a/tests/unit/includes/BBCodeTest.php b/tests/unit/includes/BBCodeTest.php index 9e9b1a33b..54c35e67d 100644 --- a/tests/unit/includes/BBCodeTest.php +++ b/tests/unit/includes/BBCodeTest.php @@ -41,6 +41,10 @@ class BBCodeTest extends UnitTestCase { 'Alt text', '[img=https://example.com/image.jpg]Alt text[/img]' ], + 'code block' => [ + "
some\ncode
", + "[code]some\ncode[/code]" + ], ]; } } diff --git a/tests/unit/includes/MarkdownTest.php b/tests/unit/includes/MarkdownTest.php index e05cec6fb..8025e8efa 100644 --- a/tests/unit/includes/MarkdownTest.php +++ b/tests/unit/includes/MarkdownTest.php @@ -69,6 +69,14 @@ class MarkdownTest extends UnitTestCase { '[img=https://example.com/image.jpg]Alt text[/img]', '![Alt text](https://example.com/image.jpg)' ], + 'inline code' => [ + '[code]some code[/code]', + '`some code`' + ], + 'code block no language' => [ + "[code]some code\nover multiple lines[/code]", + "```\nsome code\nover multiple lines\n```" + ], ]; } -- cgit v1.2.3 From e6ce2885c0b4586a270e0ace79598a92365df56f Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Tue, 6 Feb 2024 21:23:51 +0100 Subject: Fix: Keep indentation in html and md code blocks. Moves the logic for unwrapping broken lines in html (and Markdown) to the node processing, instead of doing it over the full html content. This allows us to skip if for code blocks (aka `` elements within `
` elements).
---
 include/html2bbcode.php              | 33 ++++++++++++++++-----------------
 tests/unit/includes/BBCodeTest.php   |  8 ++++++++
 tests/unit/includes/MarkdownTest.php |  4 ++++
 3 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/include/html2bbcode.php b/include/html2bbcode.php
index f75a3e428..8822d6f1d 100644
--- a/include/html2bbcode.php
+++ b/include/html2bbcode.php
@@ -65,6 +65,22 @@ function node2bbcodesub(&$doc, $oldnode, $attributes, $startbb, $endbb)
 			if ($oldNode->hasChildNodes()) {
 				foreach ($oldNode->childNodes as $child) {
 					$newNode = $child->cloneNode(true);
+
+					// Newlines are insignificant in HTML, but not so in BBCode, so let's
+					// unwrap the child nodes of when converting them. Also we compress
+					// consecutive whitespace chars to one.
+					//
+					// The exception is `
` and `` elements which
+					// should keep both newlines and whitespace intact.
+					if ($oldNode->nodeName != 'pre' && $oldNode->nodeName != 'code') {
+						$newNode->nodeValue = str_replace(
+							array("\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"),
+							array("<", ">", "
", " ", ""), + $newNode->nodeValue); + + $newNode->nodeValue = preg_replace('=[\s]{2,}=i', " ", $newNode->nodeValue); + } + $oldNode->parentNode->insertBefore($newNode, $oldNode); } } @@ -125,23 +141,6 @@ function html2bbcode($message) deletenode($doc, 'xml'); deletenode($doc, 'removeme'); - $xpath = new DomXPath($doc); - $list = $xpath->query("//pre"); - foreach ($list as $node) { - if ($node->hasChildNodes()) { - foreach ($node->childNodes as $child) { - $child->nodeValue = str_replace("\n", "\r", $child->nodeValue); - } - } else { - $node->nodeValue = str_replace("\n", "\r", $node->nodeValue); - } - } - - $message = $doc->saveHTML(); - $message = str_replace(array("\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"), array("<", ">", "
", " ", ""), $message); - $message = preg_replace('= [\s]*=i', " ", $message); - @$doc->loadHTML($message); - node2bbcode($doc, 'html', array(), "", ""); node2bbcode($doc, 'body', array(), "", ""); diff --git a/tests/unit/includes/BBCodeTest.php b/tests/unit/includes/BBCodeTest.php index 54c35e67d..035bcbdc7 100644 --- a/tests/unit/includes/BBCodeTest.php +++ b/tests/unit/includes/BBCodeTest.php @@ -37,6 +37,10 @@ class BBCodeTest extends UnitTestCase { private function html2bbcode_provider(): array { return [ + 'paragraph over multiple lines' => [ + "

A paragraph over\nmultiple lines\nshould be unwrapped

", + 'A paragraph over multiple lines should be unwrapped' + ], 'image with alt text' => [ 'Alt text', '[img=https://example.com/image.jpg]Alt text[/img]' @@ -45,6 +49,10 @@ class BBCodeTest extends UnitTestCase { "
some\ncode
", "[code]some\ncode[/code]" ], + 'code block with indentation' => [ + "
some\n    indented\ncode
", + "[code]some\n indented\ncode[/code]" + ], ]; } } diff --git a/tests/unit/includes/MarkdownTest.php b/tests/unit/includes/MarkdownTest.php index 8025e8efa..c51fbfe16 100644 --- a/tests/unit/includes/MarkdownTest.php +++ b/tests/unit/includes/MarkdownTest.php @@ -77,6 +77,10 @@ class MarkdownTest extends UnitTestCase { "[code]some code\nover multiple lines[/code]", "```\nsome code\nover multiple lines\n```" ], + 'code block no language indented' => [ + "[code]some code\n over multiple lines\n with indentation[/code]", + "```\nsome code\n over multiple lines\n with indentation\n```" + ], ]; } -- cgit v1.2.3 From 4f69e02768ac79f81f1a7ef3e7891d2e0f8f06fe Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Wed, 7 Feb 2024 16:02:53 +0100 Subject: Don't concert html nodes with no bbcode equivalent. --- include/html2bbcode.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/html2bbcode.php b/include/html2bbcode.php index 8822d6f1d..cbfbe10ba 100644 --- a/include/html2bbcode.php +++ b/include/html2bbcode.php @@ -10,6 +10,10 @@ Originally made for the syncom project: http://wiki.piratenpartei.de/Syncom function node2bbcode(&$doc, $oldnode, $attributes, $startbb, $endbb) { do { + if (empty($startbb) && empty($endbb)) { + break; + } + $done = node2bbcodesub($doc, $oldnode, $attributes, $startbb, $endbb); } while ($done); } -- cgit v1.2.3 From 86e953f49566a0f4bb2220a8e5f4013044f4a49f Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Wed, 7 Feb 2024 22:34:13 +0100 Subject: Fix: Preserve hard linebreaks from markdown and html --- include/html2bbcode.php | 6 +++++- tests/unit/includes/MarkdownTest.php | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/html2bbcode.php b/include/html2bbcode.php index cbfbe10ba..71ea68dae 100644 --- a/include/html2bbcode.php +++ b/include/html2bbcode.php @@ -192,7 +192,8 @@ function html2bbcode($message) node2bbcode($doc, 'blockquote', array(), '[quote]', '[/quote]'); - node2bbcode($doc, 'br', array(), "\n", ''); + // Use a temporary tag to keep line breaks + node2bbcode($doc, 'br', array(), '[br]', ''); node2bbcode($doc, 'p', array('class'=>'MsoNormal'), "\n", ""); node2bbcode($doc, 'div', array('class'=>'MsoNormal'), "\r", ""); @@ -302,6 +303,9 @@ function html2bbcode($message) $message = str_replace(array('[b][b]', '[/b][/b]', '[i][i]', '[/i][/i]'), array('[b]', '[/b]', '[i]', '[/i]'), $message); + // Restore linebreaks from temp tag + $message = str_replace('[br] ', "\n", $message); + // Handling Yahoo style of mails // $message = str_replace('[hr][b]From:[/b]', '[quote][b]From:[/b]', $message); diff --git a/tests/unit/includes/MarkdownTest.php b/tests/unit/includes/MarkdownTest.php index c51fbfe16..87cfc2899 100644 --- a/tests/unit/includes/MarkdownTest.php +++ b/tests/unit/includes/MarkdownTest.php @@ -57,6 +57,10 @@ class MarkdownTest extends UnitTestCase { 'This text is text wrapped over multiple lines.', "This text is\ntext wrapped\nover multiple\nlines." ], + 'text with hard linebreak' => [ + "Line one\nLine two", + "Line one \nLine two" + ], 'paragraphs' => [ "Paragraph one\n\nParagraph two", "Paragraph one\n\nParagraph two", @@ -73,6 +77,10 @@ class MarkdownTest extends UnitTestCase { '[code]some code[/code]', '`some code`' ], + 'inline code with wrapped text' => [ + '[code]some code unwrapped[/code]', + "`some code\n unwrapped`" + ], 'code block no language' => [ "[code]some code\nover multiple lines[/code]", "```\nsome code\nover multiple lines\n```" -- cgit v1.2.3 From 19ae8cfdfc27578a5f4660993702320ad1e5fac0 Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Thu, 8 Feb 2024 10:00:12 +0100 Subject: Support code blocks with language in markdown and html. --- include/html2bbcode.php | 1 + tests/unit/includes/MarkdownTest.php | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/html2bbcode.php b/include/html2bbcode.php index 71ea68dae..e2fa94326 100644 --- a/include/html2bbcode.php +++ b/include/html2bbcode.php @@ -238,6 +238,7 @@ function html2bbcode($message) node2bbcode($doc, 'audio', array('src'=>'/(.+)/'), '[audio]$1', '[/audio]'); // node2bbcode($doc, 'iframe', array('src'=>'/(.+)/'), '[iframe]$1', '[/iframe]'); + node2bbcode($doc, 'code', array('class'=>'/(.+)/'), '[code=$1]', '[/code]'); node2bbcode($doc, 'code', array(), '[code]', '[/code]'); $message = $doc->saveHTML(); diff --git a/tests/unit/includes/MarkdownTest.php b/tests/unit/includes/MarkdownTest.php index 87cfc2899..960c15139 100644 --- a/tests/unit/includes/MarkdownTest.php +++ b/tests/unit/includes/MarkdownTest.php @@ -89,6 +89,10 @@ class MarkdownTest extends UnitTestCase { "[code]some code\n over multiple lines\n with indentation[/code]", "```\nsome code\n over multiple lines\n with indentation\n```" ], + 'code block with language' => [ + "[code=php]<?php\necho phpinfo();[/code]", + "```php\n