aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorKlaus Weidenbach <Klaus.Weidenbach@gmx.net>2017-05-24 23:47:03 +0200
committerKlaus Weidenbach <Klaus.Weidenbach@gmx.net>2017-05-27 00:19:01 +0200
commit0f0e23445ab00c49a09d3167ca220ac314722cfd (patch)
tree40ba53816e36d6157dd65bd6452171c8cde3fb87 /include
parent8ce98e38dc3fff9c38d1c458577b8bb9209e24c8 (diff)
downloadvolse-hubzilla-0f0e23445ab00c49a09d3167ca220ac314722cfd.tar.gz
volse-hubzilla-0f0e23445ab00c49a09d3167ca220ac314722cfd.tar.bz2
volse-hubzilla-0f0e23445ab00c49a09d3167ca220ac314722cfd.zip
:hammer::white_check_mark: Add html2markdown unit tests.
A tiny refactoring to make HTML 2 markdown conversion testable. Add some unit tests to check the behavior of the now used HTML2Markdown library. There are some differences compared to the old pixel418/markdownify library.
Diffstat (limited to 'include')
-rw-r--r--include/markdown.php50
1 files changed, 41 insertions, 9 deletions
diff --git a/include/markdown.php b/include/markdown.php
index 39569a0f6..55ae528a4 100644
--- a/include/markdown.php
+++ b/include/markdown.php
@@ -453,15 +453,24 @@ function bb2diaspora_itembody($item, $force_update = false, $have_channel = fals
return html_entity_decode($body);
}
+/**
+ * @brief Prepare bbcode for Diaspora.
+ *
+ * @hooks bb2diaspora
+ * * \e string The prepared text for diaspora.
+ *
+ * @param string $Text bbcode
+ * @param boolean $preserve_nl (default false) preserve new lines
+ * @param boolean $fordiaspora (default true, but unused)
+ * @return string
+ */
function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
// Re-enabling the converter again.
// The bbcode parser now handles youtube-links (and the other stuff) correctly.
// Additionally the html code is now fixed so that lists are now working.
- /*
- * Transform #tags, strip off the [url] and replace spaces with underscore
- */
+ // Transform #tags, strip off the [url] and replace spaces with underscore
$Text = preg_replace_callback('/#\[([zu])rl\=(\w+.*?)\](\w+.*?)\[\/[(zu)]rl\]/i', create_function('$match',
'return \'#\'. str_replace(\' \', \'_\', $match[3]);'
), $Text);
@@ -473,7 +482,6 @@ function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
// strip map tags, as the rendering is performed in bbcode() and the resulting output
// is not compatible with Diaspora (at least in the case of openstreetmap and probably
// due to the inclusion of an html iframe)
-
$Text = preg_replace("/\[map\=(.*?)\]/ism", '$1', $Text);
$Text = preg_replace("/\[map\](.*?)\[\/map\]/ism", '$1', $Text);
@@ -491,15 +499,12 @@ function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
$Text = bbcode($Text, $preserve_nl, false);
// Markdownify does not preserve previously escaped html entities such as <> and &.
-
$Text = str_replace(array('&lt;','&gt;','&amp;'),array('&_lt_;','&_gt_;','&_amp_;'),$Text);
// Now convert HTML to Markdown
- $md = new HtmlConverter();
- $Text = $md->convert($Text);
+ $Text = html2markdown($Text);
// It also adds backslashes to our attempt at getting around the html entity preservation for some weird reason.
-
$Text = str_replace(array('&\\_lt\\_;','&\\_gt\\_;','&\\_amp\\_;'),array('&lt;','&gt;','&amp;'),$Text);
// If the text going into bbcode() has a plain URL in it, i.e.
@@ -516,7 +521,6 @@ function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
// Remove any leading or trailing whitespace, as this will mess up
// the Diaspora signature verification and cause the item to disappear
-
$Text = trim($Text);
call_hooks('bb2diaspora', $Text);
@@ -563,3 +567,31 @@ function format_event_diaspora($ev) {
return $o;
}
+
+/**
+ * @brief Convert a HTML text into Markdown.
+ *
+ * This function uses the library league/html-to-markdown for this task.
+ *
+ * If the HTML text can not get parsed it will return an empty string.
+ *
+ * @see HTMLToMarkdown
+ *
+ * @param string $html The HTML code to convert
+ * @return string Markdown representation of the given HTML text, empty on error
+ */
+function html2markdown(String $html) : String {
+ $markdown = '';
+ $converter = new HtmlConverter();
+
+ try {
+ $markdown = $converter->convert($html);
+ } catch (InvalidArgumentException $e) {
+ logger("Invalid HTML. HTMLToMarkdown library threw an exception.");
+ }
+
+ // The old html 2 markdown library "pixel418/markdownify": "^2.2",
+ //$md = new HtmlConverter();
+ //$markdown = $md->convert($Text);
+ return $markdown;
+}