From 0b371c8103b49a1bc9cde99fc13dabc330e9936c Mon Sep 17 00:00:00 2001 From: Mario Vavti Date: Tue, 30 Oct 2018 13:55:08 +0100 Subject: fix html2markdown() and re-enable previously failing tests --- include/markdown.php | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include/markdown.php') diff --git a/include/markdown.php b/include/markdown.php index d2148811c..0947afeff 100644 --- a/include/markdown.php +++ b/include/markdown.php @@ -248,20 +248,12 @@ function bb_to_markdown($Text, $options = []) { // Convert it to HTML - don't try oembed $Text = bbcode($Text, [ 'tryoembed' => false ]); - // Markdownify does not preserve previously escaped html entities such as <> and &. - //$Text = str_replace(array('<','>','&'),array('&_lt_;','&_gt_;','&_amp_;'),$Text); - // Now convert HTML to Markdown - $Text = html2markdown($Text); //html2markdown adds backslashes infront of hashes after a new line. remove them $Text = str_replace("\n\#", "\n#", $Text); - // It also adds backslashes to our attempt at getting around the html entity preservation for some weird reason. - - //$Text = str_replace(array('&\\_lt\\_;','&\\_gt\\_;','&\\_amp\\_;'),array('<','>','&'),$Text); - // If the text going into bbcode() has a plain URL in it, i.e. // with no [url] tags around it, it will come out of parseString() // looking like: , which gets removed by strip_tags(). @@ -298,7 +290,8 @@ function html2markdown($html,$options = []) { $internal_errors = libxml_use_internal_errors(true); - $environment = Environment::createDefaultEnvironment($options); + $environment = new Environment($options); + $environment->createDefaultEnvironment(); $environment->addConverter(new TableConverter()); $converter = new HtmlConverter($environment); -- cgit v1.2.3 From c622ba84b9aadd4377ce5ea283121ac27726ea83 Mon Sep 17 00:00:00 2001 From: Mario Vavti Date: Tue, 30 Oct 2018 22:16:37 +0100 Subject: really fix html2markdown() - when using environment, we must set the defaults --- include/markdown.php | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include/markdown.php') diff --git a/include/markdown.php b/include/markdown.php index 0947afeff..e7b57c3ee 100644 --- a/include/markdown.php +++ b/include/markdown.php @@ -288,10 +288,20 @@ function bb_to_markdown($Text, $options = []) { function html2markdown($html,$options = []) { $markdown = ''; - $internal_errors = libxml_use_internal_errors(true); + if(! $options) { + $options = [ + 'header_style' => 'setext', // Set to 'atx' to output H1 and H2 headers as # Header1 and ## Header2 + 'suppress_errors' => true, // Set to false to show warnings when loading malformed HTML + 'strip_tags' => false, // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output. + 'bold_style' => '**', // DEPRECATED: Set to '__' if you prefer the underlined style + 'italic_style' => '*', // DEPRECATED: Set to '_' if you prefer the underlined style + 'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: 'meta style script' + 'hard_break' => false, // Set to true to turn
into `\n` instead of ` \n` + 'list_item_style' => '-', // Set the default character for each
  • in a
      . Can be '-', '*', or '+' + ]; + } - $environment = new Environment($options); - $environment->createDefaultEnvironment(); + $environment = Environment::createDefaultEnvironment($options); $environment->addConverter(new TableConverter()); $converter = new HtmlConverter($environment); @@ -301,8 +311,6 @@ function html2markdown($html,$options = []) { logger("Invalid HTML. HTMLToMarkdown library threw an exception."); } - libxml_use_internal_errors($internal_errors); - return $markdown; } -- cgit v1.2.3 From b4bac88c39c6bc6a0533114351ab9f5493aa46c9 Mon Sep 17 00:00:00 2001 From: Mario Vavti Date: Tue, 30 Oct 2018 22:37:41 +0100 Subject: fix markdown tests --- include/markdown.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/markdown.php') diff --git a/include/markdown.php b/include/markdown.php index e7b57c3ee..64f0a0854 100644 --- a/include/markdown.php +++ b/include/markdown.php @@ -283,9 +283,10 @@ function bb_to_markdown($Text, $options = []) { * If the HTML text can not get parsed it will return an empty string. * * @param string $html The HTML code to convert + * @param array $options an array of options to pass to the environment * @return string Markdown representation of the given HTML text, empty on error */ -function html2markdown($html,$options = []) { +function html2markdown($html, $options = []) { $markdown = ''; if(! $options) { -- cgit v1.2.3