diff options
author | Harald Eilertsen <haraldei@anduin.net> | 2024-02-06 21:23:51 +0100 |
---|---|---|
committer | Harald Eilertsen <haraldei@anduin.net> | 2024-02-07 15:54:40 +0100 |
commit | e6ce2885c0b4586a270e0ace79598a92365df56f (patch) | |
tree | 6bb17f31c8334c34f573993c1d0e1291cc670849 | |
parent | ec19ee9d82a9d06e5b86fcb58329767226b0676f (diff) | |
download | volse-hubzilla-e6ce2885c0b4586a270e0ace79598a92365df56f.tar.gz volse-hubzilla-e6ce2885c0b4586a270e0ace79598a92365df56f.tar.bz2 volse-hubzilla-e6ce2885c0b4586a270e0ace79598a92365df56f.zip |
Fix: Keep indentation in html and md code blocks.
Moves the logic for unwrapping broken lines in html (and Markdown) to
the node processing, instead of doing it over the full html content.
This allows us to skip if for code blocks (aka `<code>` elements within
`<pre>` elements).
-rw-r--r-- | include/html2bbcode.php | 33 | ||||
-rw-r--r-- | tests/unit/includes/BBCodeTest.php | 8 | ||||
-rw-r--r-- | tests/unit/includes/MarkdownTest.php | 4 |
3 files changed, 28 insertions, 17 deletions
diff --git a/include/html2bbcode.php b/include/html2bbcode.php index f75a3e428..8822d6f1d 100644 --- a/include/html2bbcode.php +++ b/include/html2bbcode.php @@ -65,6 +65,22 @@ function node2bbcodesub(&$doc, $oldnode, $attributes, $startbb, $endbb) if ($oldNode->hasChildNodes()) { foreach ($oldNode->childNodes as $child) { $newNode = $child->cloneNode(true); + + // Newlines are insignificant in HTML, but not so in BBCode, so let's + // unwrap the child nodes of when converting them. Also we compress + // consecutive whitespace chars to one. + // + // The exception is `<pre>` and `<code>` elements which + // should keep both newlines and whitespace intact. + if ($oldNode->nodeName != 'pre' && $oldNode->nodeName != 'code') { + $newNode->nodeValue = str_replace( + array("\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"), + array("<", ">", "<br />", " ", ""), + $newNode->nodeValue); + + $newNode->nodeValue = preg_replace('=[\s]{2,}=i', " ", $newNode->nodeValue); + } + $oldNode->parentNode->insertBefore($newNode, $oldNode); } } @@ -125,23 +141,6 @@ function html2bbcode($message) deletenode($doc, 'xml'); deletenode($doc, 'removeme'); - $xpath = new DomXPath($doc); - $list = $xpath->query("//pre"); - foreach ($list as $node) { - if ($node->hasChildNodes()) { - foreach ($node->childNodes as $child) { - $child->nodeValue = str_replace("\n", "\r", $child->nodeValue); - } - } else { - $node->nodeValue = str_replace("\n", "\r", $node->nodeValue); - } - } - - $message = $doc->saveHTML(); - $message = str_replace(array("\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"), array("<", ">", "<br />", " ", ""), $message); - $message = preg_replace('= [\s]*=i', " ", $message); - @$doc->loadHTML($message); - node2bbcode($doc, 'html', array(), "", ""); node2bbcode($doc, 'body', array(), "", ""); diff --git a/tests/unit/includes/BBCodeTest.php b/tests/unit/includes/BBCodeTest.php index 54c35e67d..035bcbdc7 100644 --- a/tests/unit/includes/BBCodeTest.php +++ b/tests/unit/includes/BBCodeTest.php @@ -37,6 +37,10 @@ class BBCodeTest extends UnitTestCase { private function html2bbcode_provider(): array { return [ + 'paragraph over multiple lines' => [ + "<p>A paragraph over\nmultiple lines\nshould be unwrapped</p>", + 'A paragraph over multiple lines should be unwrapped' + ], 'image with alt text' => [ '<img src="https://example.com/image.jpg" alt="Alt text">', '[img=https://example.com/image.jpg]Alt text[/img]' @@ -45,6 +49,10 @@ class BBCodeTest extends UnitTestCase { "<pre><code>some\ncode</code></pre>", "[code]some\ncode[/code]" ], + 'code block with indentation' => [ + "<pre><code>some\n indented\ncode</code></pre>", + "[code]some\n indented\ncode[/code]" + ], ]; } } diff --git a/tests/unit/includes/MarkdownTest.php b/tests/unit/includes/MarkdownTest.php index 8025e8efa..c51fbfe16 100644 --- a/tests/unit/includes/MarkdownTest.php +++ b/tests/unit/includes/MarkdownTest.php @@ -77,6 +77,10 @@ class MarkdownTest extends UnitTestCase { "[code]some code\nover multiple lines[/code]", "```\nsome code\nover multiple lines\n```" ], + 'code block no language indented' => [ + "[code]some code\n over multiple lines\n with indentation[/code]", + "```\nsome code\n over multiple lines\n with indentation\n```" + ], ]; } |