diff options
author | Klaus Weidenbach <Klaus.Weidenbach@gmx.net> | 2017-05-24 23:47:03 +0200 |
---|---|---|
committer | Klaus Weidenbach <Klaus.Weidenbach@gmx.net> | 2017-05-27 00:19:01 +0200 |
commit | 0f0e23445ab00c49a09d3167ca220ac314722cfd (patch) | |
tree | 40ba53816e36d6157dd65bd6452171c8cde3fb87 | |
parent | 8ce98e38dc3fff9c38d1c458577b8bb9209e24c8 (diff) | |
download | volse-hubzilla-0f0e23445ab00c49a09d3167ca220ac314722cfd.tar.gz volse-hubzilla-0f0e23445ab00c49a09d3167ca220ac314722cfd.tar.bz2 volse-hubzilla-0f0e23445ab00c49a09d3167ca220ac314722cfd.zip |
:hammer::white_check_mark: Add html2markdown unit tests.
A tiny refactoring to make HTML 2 markdown conversion testable.
Add some unit tests to check the behavior of the now used HTML2Markdown library.
There are some differences compared to the old pixel418/markdownify library.
-rw-r--r-- | include/markdown.php | 50 | ||||
-rw-r--r-- | tests/unit/includes/MarkdownTest.php | 149 | ||||
-rw-r--r-- | tests/unit/includes/TextTest.php | 18 |
3 files changed, 208 insertions, 9 deletions
diff --git a/include/markdown.php b/include/markdown.php index 39569a0f6..55ae528a4 100644 --- a/include/markdown.php +++ b/include/markdown.php @@ -453,15 +453,24 @@ function bb2diaspora_itembody($item, $force_update = false, $have_channel = fals return html_entity_decode($body); } +/** + * @brief Prepare bbcode for Diaspora. + * + * @hooks bb2diaspora + * * \e string The prepared text for diaspora. + * + * @param string $Text bbcode + * @param boolean $preserve_nl (default false) preserve new lines + * @param boolean $fordiaspora (default true, but unused) + * @return string + */ function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) { // Re-enabling the converter again. // The bbcode parser now handles youtube-links (and the other stuff) correctly. // Additionally the html code is now fixed so that lists are now working. - /* - * Transform #tags, strip off the [url] and replace spaces with underscore - */ + // Transform #tags, strip off the [url] and replace spaces with underscore $Text = preg_replace_callback('/#\[([zu])rl\=(\w+.*?)\](\w+.*?)\[\/[(zu)]rl\]/i', create_function('$match', 'return \'#\'. str_replace(\' \', \'_\', $match[3]);' ), $Text); @@ -473,7 +482,6 @@ function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) { // strip map tags, as the rendering is performed in bbcode() and the resulting output // is not compatible with Diaspora (at least in the case of openstreetmap and probably // due to the inclusion of an html iframe) - $Text = preg_replace("/\[map\=(.*?)\]/ism", '$1', $Text); $Text = preg_replace("/\[map\](.*?)\[\/map\]/ism", '$1', $Text); @@ -491,15 +499,12 @@ function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) { $Text = bbcode($Text, $preserve_nl, false); // Markdownify does not preserve previously escaped html entities such as <> and &. - $Text = str_replace(array('<','>','&'),array('&_lt_;','&_gt_;','&_amp_;'),$Text); // Now convert HTML to Markdown - $md = new HtmlConverter(); - $Text = $md->convert($Text); + $Text = html2markdown($Text); // It also adds backslashes to our attempt at getting around the html entity preservation for some weird reason. - $Text = str_replace(array('&\\_lt\\_;','&\\_gt\\_;','&\\_amp\\_;'),array('<','>','&'),$Text); // If the text going into bbcode() has a plain URL in it, i.e. @@ -516,7 +521,6 @@ function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) { // Remove any leading or trailing whitespace, as this will mess up // the Diaspora signature verification and cause the item to disappear - $Text = trim($Text); call_hooks('bb2diaspora', $Text); @@ -563,3 +567,31 @@ function format_event_diaspora($ev) { return $o; } + +/** + * @brief Convert a HTML text into Markdown. + * + * This function uses the library league/html-to-markdown for this task. + * + * If the HTML text can not get parsed it will return an empty string. + * + * @see HTMLToMarkdown + * + * @param string $html The HTML code to convert + * @return string Markdown representation of the given HTML text, empty on error + */ +function html2markdown(String $html) : String { + $markdown = ''; + $converter = new HtmlConverter(); + + try { + $markdown = $converter->convert($html); + } catch (InvalidArgumentException $e) { + logger("Invalid HTML. HTMLToMarkdown library threw an exception."); + } + + // The old html 2 markdown library "pixel418/markdownify": "^2.2", + //$md = new HtmlConverter(); + //$markdown = $md->convert($Text); + return $markdown; +} diff --git a/tests/unit/includes/MarkdownTest.php b/tests/unit/includes/MarkdownTest.php new file mode 100644 index 000000000..3026c633a --- /dev/null +++ b/tests/unit/includes/MarkdownTest.php @@ -0,0 +1,149 @@ +<?php +/* + * Copyright (c) 2017 Hubzilla +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +namespace Zotlabs\Tests\Unit\includes; + +use Zotlabs\Tests\Unit\UnitTestCase; +use phpmock\phpunit\PHPMock; + +require_once 'include/markdown.php'; + +/** + * @brief Unit Test case for markdown functions. + */ +class MarkdownTest extends UnitTestCase { + use PHPMock; + + /** + * @covers ::html2markdown + * @dataProvider html2markdownProvider + */ + public function testHtml2markdown($html, $markdown) { + $this->assertEquals($markdown, html2markdown($html)); + } + + public function html2markdownProvider() { + return [ + 'empty text' => [ + '', + '' + ], + 'space and nbsp only' => [ + ' ', + '' + ], + 'strong, b, em, i, bib' => [ + '<strong>strong</strong> <b>bold</b> <em>em</em> <i>italic</i> <b>bo<i>italic</i>ld</b>', + '**strong** **bold** _em_ _italic_ **bo_italic_ld**' + ], + 'empty tags' => [ + 'text1 <b></b> text2 <i></i>', + 'text1 text2' + ], + 'HTML entities, lt does not work' => [ + '& gt > lt <', + '& gt > lt' + ], + 'escaped HTML entities' => [ + '& lt < gt >', + '& lt < gt >' + ], + 'our escaped HTML entities' => [ + '&_lt_; &_gt_; &_amp_;', + '&\_lt\_; &\_gt\_; &\_amp\_;' + ], + 'linebreak' => [ + "line1<br>line2\nline3", + "line1 \nline2 line3" + ], + 'headlines' => [ + '<h1>header1</h1><h3>Header 3</h3>', + "header1\n=======\n\n### Header 3" + ], + 'unordered list' => [ + '<ul><li>Item 1</li><li>Item 2</li><li>Item <b>3</b></li></ul>', + "- Item 1\n- Item 2\n- Item **3**" + ], + 'ordered list' => [ + '<ol><li>Item 1</li><li>Item 2</li><li>Item <b>3</b></li></ol>', + "1. Item 1\n2. Item 2\n3. Item **3**" + ], + 'nested lists' => [ + '<ul><li>Item 1<ol><li>Item 1a</li><li>Item <b>1b</b></ol></li><li>Item 2</li></ul>', + "- Item 1\n 1. Item 1a\n 2. Item **1b**\n- Item 2" + ], + 'img' => [ + '<img src="/path/to/img.png" alt="alt text" title="title text">', + '![alt text](/path/to/img.png "title text")' + ], + 'link' => [ + '<a href="http://hubzilla.org" title="Hubzilla">link</a>', + '[link](http://hubzilla.org "Hubzilla")' + ], + 'img link' => [ + '<a href="http://hubzilla.org" title="Hubzilla"><img src="/img/hubzilla.png" alt="alt img text" title="img title"></a>', + '[![alt img text](/img/hubzilla.png "img title")](http://hubzilla.org "Hubzilla")' + ], + 'script' => [ + "<script>alert('test');</script>", + "<script>alert('test');</script>" + ], + 'blockquote, issue #793' => [ + '<blockquote>something</blockquote>blah', + "> something\n\nblah" + ], + 'code' => [ + '<code><p>HTML text</p></code>', + '`<p>HTML text</p>`' + ], + 'pre' => [ + '<pre> line with spaces </pre>', + '` line with spaces `' + ], + 'div p' => [ + '<div>div</div><div><p>p</p></div>', + "<div>div</div><div>p\n\n</div>" + ] + ]; + } + + /*public function testHtml2markdownException() { + //$this->expectException(\InvalidArgumentException::class); + // need to stub logger() for this to work + $this->assertEquals('', html2markdown('<<invalid')); + }*/ + +/* public function testBB2diasporaMardown() { + //stub bbcode() and return our HTML, we just need to test the HTML2Markdown library. + $html1 = 'test<b>bold</b><br><i>i</i><ul><li>li1</li><li>li2</li></ul><br>'; + $bb1 = 'test'; + + // php-mock can not mock global functions which is called by a global function. + // If the calling function is in a namespace it does work. + $bbc = $this->getFunctionMock(__NAMESPACE__, "bbcode"); + $bbc->expects($this->once())->willReturn('test<b>bold</b><br><i>i</i><ul><li>li1</li><li>li2</li></ul><br>'); + + $this->assertEquals($bb1, bb2diaspora($html1)); + } +*/ +}
\ No newline at end of file diff --git a/tests/unit/includes/TextTest.php b/tests/unit/includes/TextTest.php index e2c7cbb9a..4afa2b49b 100644 --- a/tests/unit/includes/TextTest.php +++ b/tests/unit/includes/TextTest.php @@ -11,6 +11,9 @@ use Zotlabs\Tests\Unit\UnitTestCase; */ class TextTest extends UnitTestCase { + /** + * @covers ::valid_email_regex + */ public function testGoodEmail() { $this->assertTrue(valid_email_regex('ken@spaz.org')); $this->assertTrue(valid_email_regex('ken@restivo.org')); @@ -18,11 +21,17 @@ class TextTest extends UnitTestCase { $this->assertTrue(valid_email_regex('foo+nobody@hubzilla.org')); } + /** + * @covers ::valid_email_regex + */ public function testBadEmail() { $this->assertFalse(valid_email_regex('nobody!uses!these!any.more')); $this->assertFalse(valid_email_regex('foo@bar@hubzilla.org')); } + /** + * @covers ::purify_html + */ public function testPurifyHTML() { // linebreaks $htmlbr = 'first line<br /> @@ -46,6 +55,9 @@ empty line above'; $this->assertEquals('<ul><li>item1</li></ul>', purify_html('<ul data-accordion-menu-unknown><li>item1</li></ul>')); } + /** + * @covers ::purify_html + */ public function testPurifyHTML_html() { $this->assertEquals('<div id="id01"><p class="class01">ids und classes</p></div>', purify_html('<div id="id01"><p class="class01">ids und classes</p></div>')); $this->assertEquals('<div><p>close missing tags</p></div>', purify_html('<div><p>close missing tags')); @@ -59,6 +71,9 @@ empty line above'; $this->assertEquals('', purify_html('<iframe width="560" height="315" src="https://www.youtube.com/embed/kiNGx5oL7hk" frameborder="0" allowfullscreen></iframe>')); } + /** + * @covers ::purify_html + */ public function testPurifyHTML_js() { $this->assertEquals('<div></div>', purify_html('<div><img src="javascript:evil();" onload="evil();"></div>')); $this->assertEquals('<a href="#">link</a>', purify_html('<a href="#" onclick="alert(\'xss\')">link</a>')); @@ -66,6 +81,9 @@ empty line above'; $this->assertEquals('', purify_html('<script>alter("42")</script>')); } + /** + * @covers ::purify_html + */ public function testPurifyHTML_css() { $this->assertEquals('<p style="color:#FF0000;background-color:#fff;">red</p>', purify_html('<p style="color:red; background-color:#fff">red</p>')); $this->assertEquals('<p>invalid color</p>', purify_html('<p style="color:invalid; background-color:#jjkkmm">invalid color</p>')); |