aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Eilertsen <haraldei@anduin.net>2024-02-06 21:23:51 +0100
committerHarald Eilertsen <haraldei@anduin.net>2024-02-07 15:54:40 +0100
commite6ce2885c0b4586a270e0ace79598a92365df56f (patch)
tree6bb17f31c8334c34f573993c1d0e1291cc670849
parentec19ee9d82a9d06e5b86fcb58329767226b0676f (diff)
downloadvolse-hubzilla-e6ce2885c0b4586a270e0ace79598a92365df56f.tar.gz
volse-hubzilla-e6ce2885c0b4586a270e0ace79598a92365df56f.tar.bz2
volse-hubzilla-e6ce2885c0b4586a270e0ace79598a92365df56f.zip
Fix: Keep indentation in html and md code blocks.
Moves the logic for unwrapping broken lines in html (and Markdown) to the node processing, instead of doing it over the full html content. This allows us to skip if for code blocks (aka `<code>` elements within `<pre>` elements).
-rw-r--r--include/html2bbcode.php33
-rw-r--r--tests/unit/includes/BBCodeTest.php8
-rw-r--r--tests/unit/includes/MarkdownTest.php4
3 files changed, 28 insertions, 17 deletions
diff --git a/include/html2bbcode.php b/include/html2bbcode.php
index f75a3e428..8822d6f1d 100644
--- a/include/html2bbcode.php
+++ b/include/html2bbcode.php
@@ -65,6 +65,22 @@ function node2bbcodesub(&$doc, $oldnode, $attributes, $startbb, $endbb)
if ($oldNode->hasChildNodes()) {
foreach ($oldNode->childNodes as $child) {
$newNode = $child->cloneNode(true);
+
+ // Newlines are insignificant in HTML, but not so in BBCode, so let's
+ // unwrap the child nodes of when converting them. Also we compress
+ // consecutive whitespace chars to one.
+ //
+ // The exception is `<pre>` and `<code>` elements which
+ // should keep both newlines and whitespace intact.
+ if ($oldNode->nodeName != 'pre' && $oldNode->nodeName != 'code') {
+ $newNode->nodeValue = str_replace(
+ array("\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"),
+ array("<", ">", "<br />", " ", ""),
+ $newNode->nodeValue);
+
+ $newNode->nodeValue = preg_replace('=[\s]{2,}=i', " ", $newNode->nodeValue);
+ }
+
$oldNode->parentNode->insertBefore($newNode, $oldNode);
}
}
@@ -125,23 +141,6 @@ function html2bbcode($message)
deletenode($doc, 'xml');
deletenode($doc, 'removeme');
- $xpath = new DomXPath($doc);
- $list = $xpath->query("//pre");
- foreach ($list as $node) {
- if ($node->hasChildNodes()) {
- foreach ($node->childNodes as $child) {
- $child->nodeValue = str_replace("\n", "\r", $child->nodeValue);
- }
- } else {
- $node->nodeValue = str_replace("\n", "\r", $node->nodeValue);
- }
- }
-
- $message = $doc->saveHTML();
- $message = str_replace(array("\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"), array("<", ">", "<br />", " ", ""), $message);
- $message = preg_replace('= [\s]*=i', " ", $message);
- @$doc->loadHTML($message);
-
node2bbcode($doc, 'html', array(), "", "");
node2bbcode($doc, 'body', array(), "", "");
diff --git a/tests/unit/includes/BBCodeTest.php b/tests/unit/includes/BBCodeTest.php
index 54c35e67d..035bcbdc7 100644
--- a/tests/unit/includes/BBCodeTest.php
+++ b/tests/unit/includes/BBCodeTest.php
@@ -37,6 +37,10 @@ class BBCodeTest extends UnitTestCase {
private function html2bbcode_provider(): array {
return [
+ 'paragraph over multiple lines' => [
+ "<p>A paragraph over\nmultiple lines\nshould be unwrapped</p>",
+ 'A paragraph over multiple lines should be unwrapped'
+ ],
'image with alt text' => [
'<img src="https://example.com/image.jpg" alt="Alt text">',
'[img=https://example.com/image.jpg]Alt text[/img]'
@@ -45,6 +49,10 @@ class BBCodeTest extends UnitTestCase {
"<pre><code>some\ncode</code></pre>",
"[code]some\ncode[/code]"
],
+ 'code block with indentation' => [
+ "<pre><code>some\n indented\ncode</code></pre>",
+ "[code]some\n indented\ncode[/code]"
+ ],
];
}
}
diff --git a/tests/unit/includes/MarkdownTest.php b/tests/unit/includes/MarkdownTest.php
index 8025e8efa..c51fbfe16 100644
--- a/tests/unit/includes/MarkdownTest.php
+++ b/tests/unit/includes/MarkdownTest.php
@@ -77,6 +77,10 @@ class MarkdownTest extends UnitTestCase {
"[code]some code\nover multiple lines[/code]",
"```\nsome code\nover multiple lines\n```"
],
+ 'code block no language indented' => [
+ "[code]some code\n over multiple lines\n with indentation[/code]",
+ "```\nsome code\n over multiple lines\n with indentation\n```"
+ ],
];
}