From 6c79e0c077971029343b2dff30017571ea118438 Mon Sep 17 00:00:00 2001 From: Klaus Weidenbach Date: Thu, 2 Mar 2017 23:25:04 +0100 Subject: :arrow_up: :hammer: Upgrade Markdownify library. The current version 2.0.0 (alpha) throws deprecated warning with PHP7.1 and PHPUnit. Upgrade the HTML to Markdown converter for PHP to the current Markdownify 2.2.1. Used composer to manage this library. --- vendor/pixel418/markdownify/src/Converter.php | 1400 ++++++++++++++++++++ vendor/pixel418/markdownify/src/ConverterExtra.php | 573 ++++++++ vendor/pixel418/markdownify/src/Parser.php | 564 ++++++++ 3 files changed, 2537 insertions(+) create mode 100644 vendor/pixel418/markdownify/src/Converter.php create mode 100644 vendor/pixel418/markdownify/src/ConverterExtra.php create mode 100644 vendor/pixel418/markdownify/src/Parser.php (limited to 'vendor/pixel418/markdownify/src') diff --git a/vendor/pixel418/markdownify/src/Converter.php b/vendor/pixel418/markdownify/src/Converter.php new file mode 100644 index 000000000..77c62dc7e --- /dev/null +++ b/vendor/pixel418/markdownify/src/Converter.php @@ -0,0 +1,1400 @@ + + */ + protected $notConverted = array(); + + /** + * skip conversion to markdown + * + * @var bool + */ + protected $skipConversion = false; + + /* options */ + + /** + * keep html tags which cannot be converted to markdown + * + * @var bool + */ + protected $keepHTML = false; + + /** + * wrap output, set to 0 to skip wrapping + * + * @var int + */ + protected $bodyWidth = 0; + + /** + * minimum body width + * + * @var int + */ + protected $minBodyWidth = 25; + + /** + * position where the link reference will be displayed + * + * + * @var int + */ + protected $linkPosition; + const LINK_AFTER_CONTENT = 0; + const LINK_AFTER_PARAGRAPH = 1; + const LINK_IN_PARAGRAPH = 2; + + /** + * stores current buffers + * + * @var array + */ + protected $buffer = array(); + + /** + * stores current buffers + * + * @var array + */ + protected $footnotes = array(); + + /** + * tags with elements which can be handled by markdown + * + * @var array + */ + protected $isMarkdownable = array( + 'p' => array(), + 'ul' => array(), + 'ol' => array(), + 'li' => array(), + 'br' => array(), + 'blockquote' => array(), + 'code' => array(), + 'pre' => array(), + 'a' => array( + 'href' => 'required', + 'title' => 'optional', + ), + 'strong' => array(), + 'b' => array(), + 'em' => array(), + 'i' => array(), + 'img' => array( + 'src' => 'required', + 'alt' => 'optional', + 'title' => 'optional', + ), + 'h1' => array(), + 'h2' => array(), + 'h3' => array(), + 'h4' => array(), + 'h5' => array(), + 'h6' => array(), + 'hr' => array(), + ); + + /** + * html tags to be ignored (contents will be parsed) + * + * @var array + */ + protected $ignore = array( + 'html', + 'body', + ); + + /** + * html tags to be dropped (contents will not be parsed!) + * + * @var array + */ + protected $drop = array( + 'script', + 'head', + 'style', + 'form', + 'area', + 'object', + 'param', + 'iframe', + ); + + /** + * html block tags that allow inline & block children + * + * @var array + */ + protected $allowMixedChildren = array( + 'li' + ); + + /** + * Markdown indents which could be wrapped + * @note: use strings in regex format + * + * @var array + */ + protected $wrappableIndents = array( + '\* ', // ul + '\d. ', // ol + '\d\d. ', // ol + '> ', // blockquote + '', // p + ); + + /** + * list of chars which have to be escaped in normal text + * @note: use strings in regex format + * + * @var array + * + * TODO: what's with block chars / sequences at the beginning of a block? + */ + protected $escapeInText = array( + '\*\*([^*]+)\*\*' => '\*\*$1\*\*', // strong + '\*([^*]+)\*' => '\*$1\*', // em + '__(?! |_)(.+)(?!<_| )__' => '\_\_$1\_\_', // strong + '_(?! |_)(.+)(?!<_| )_' => '\_$1\_', // em + '([-*_])([ ]{0,2}\1){2,}' => '\\\\$0', // hr + '`' => '\`', // code + '\[(.+)\](\s*\()' => '\[$1\]$2', // links: [text] (url) => [text\] (url) + '\[(.+)\](\s*)\[(.*)\]' => '\[$1\]$2\[$3\]', // links: [text][id] => [text\][id\] + '^#(#{0,5}) ' => '\#$1 ', // header + ); + + /** + * wether last processed node was a block tag or not + * + * @var bool + */ + protected $lastWasBlockTag = false; + + /** + * name of last closed tag + * + * @var string + */ + protected $lastClosedTag = ''; + + /** + * number of line breaks before next inline output + */ + protected $lineBreaks = 0; + + /** + * node stack, e.g. for and tags + * + * @var array + */ + protected $stack = array(); + + /** + * current indentation + * + * @var string + */ + protected $indent = ''; + + /** + * constructor, set options, setup parser + * + * @param int $linkPosition define the position of links + * @param int $bodyWidth whether or not to wrap the output to the given width + * defaults to false + * @param bool $keepHTML whether to keep non markdownable HTML or to discard it + * defaults to true (HTML will be kept) + * @return void + */ + public function __construct($linkPosition = self::LINK_AFTER_CONTENT, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML) + { + $this->linkPosition = $linkPosition; + $this->keepHTML = $keepHTML; + + if ($bodyWidth > $this->minBodyWidth) { + $this->bodyWidth = intval($bodyWidth); + } else { + $this->bodyWidth = false; + } + + $this->parser = new Parser; + $this->parser->noTagsInCode = true; + + // we don't have to do this every time + $search = array(); + $replace = array(); + foreach ($this->escapeInText as $s => $r) { + array_push($search, '@(?escapeInText = array( + 'search' => $search, + 'replace' => $replace + ); + } + + /** + * parse a HTML string + * + * @param string $html + * @return string markdown formatted + */ + public function parseString($html) + { + $this->resetState(); + + $this->parser->html = $html; + $this->parse(); + + return $this->output; + } + + /** + * set the position where the link reference will be displayed + * + * @param int $linkPosition + * @return void + */ + public function setLinkPosition($linkPosition) + { + $this->linkPosition = $linkPosition; + } + + /** + * set keep HTML tags which cannot be converted to markdown + * + * @param bool $linkPosition + * @return void + */ + public function setKeepHTML($keepHTML) + { + $this->keepHTML = $keepHTML; + } + + /** + * iterate through the nodes and decide what we + * shall do with the current node + * + * @param void + * @return void + */ + protected function parse() + { + $this->output = ''; + // drop tags + $this->parser->html = preg_replace('#<(' . implode('|', $this->drop) . ')[^>]*>.*#sU', '', $this->parser->html); + while ($this->parser->nextNode()) { + switch ($this->parser->nodeType) { + case 'doctype': + break; + case 'pi': + case 'comment': + if ($this->keepHTML) { + $this->flushLinebreaks(); + $this->out($this->parser->node); + $this->setLineBreaks(2); + } + // else drop + break; + case 'text': + $this->handleText(); + break; + case 'tag': + if (in_array($this->parser->tagName, $this->ignore)) { + break; + } + // If the previous tag was not a block element, we simulate a paragraph tag + if ($this->parser->isBlockElement && $this->parser->isNextToInlineContext && !in_array($this->parent(), $this->allowMixedChildren)) { + $this->setLineBreaks(2); + } + if ($this->parser->isStartTag) { + $this->flushLinebreaks(); + } + if ($this->skipConversion) { + $this->isMarkdownable(); // update notConverted + $this->handleTagToText(); + continue; + } + + // block elements + if (!$this->parser->keepWhitespace && $this->parser->isBlockElement) { + $this->fixBlockElementSpacing(); + } + + // inline elements + if (!$this->parser->keepWhitespace && $this->parser->isInlineContext) { + $this->fixInlineElementSpacing(); + } + + if ($this->isMarkdownable()) { + if ($this->parser->isBlockElement && $this->parser->isStartTag && !$this->lastWasBlockTag && !empty($this->output)) { + if (!empty($this->buffer)) { + $str =& $this->buffer[count($this->buffer) - 1]; + } else { + $str =& $this->output; + } + if (substr($str, -strlen($this->indent) - 1) != "\n" . $this->indent) { + $str .= "\n" . $this->indent; + } + } + $func = 'handleTag_' . $this->parser->tagName; + $this->$func(); + if ($this->linkPosition == self::LINK_AFTER_PARAGRAPH && $this->parser->isBlockElement && !$this->parser->isStartTag && empty($this->parser->openTags)) { + $this->flushFootnotes(); + } + if (!$this->parser->isStartTag) { + $this->lastClosedTag = $this->parser->tagName; + } + } else { + $this->handleTagToText(); + $this->lastClosedTag = ''; + } + break; + default: + trigger_error('invalid node type', E_USER_ERROR); + break; + } + $this->lastWasBlockTag = $this->parser->nodeType == 'tag' && $this->parser->isStartTag && $this->parser->isBlockElement; + } + if (!empty($this->buffer)) { + // trigger_error('buffer was not flushed, this is a bug. please report!', E_USER_WARNING); + while (!empty($this->buffer)) { + $this->out($this->unbuffer()); + } + } + // cleanup + $this->output = rtrim(str_replace('&', '&', str_replace('<', '<', str_replace('>', '>', $this->output)))); + // end parsing, flush stacked tags + $this->flushFootnotes(); + $this->stack = array(); + } + + /** + * check if current tag can be converted to Markdown + * + * @param void + * @return bool + */ + protected function isMarkdownable() + { + if (!isset($this->isMarkdownable[$this->parser->tagName])) { + // simply not markdownable + + return false; + } + if ($this->parser->isStartTag) { + $return = true; + if ($this->keepHTML) { + $diff = array_diff(array_keys($this->parser->tagAttributes), array_keys($this->isMarkdownable[$this->parser->tagName])); + if (!empty($diff)) { + // non markdownable attributes given + $return = false; + } + } + if ($return) { + foreach ($this->isMarkdownable[$this->parser->tagName] as $attr => $type) { + if ($type == 'required' && !isset($this->parser->tagAttributes[$attr])) { + // required markdown attribute not given + $return = false; + break; + } + } + } + if (!$return) { + array_push($this->notConverted, $this->parser->tagName . '::' . implode('/', $this->parser->openTags)); + } + + return $return; + } else { + if (!empty($this->notConverted) && end($this->notConverted) === $this->parser->tagName . '::' . implode('/', $this->parser->openTags)) { + array_pop($this->notConverted); + + return false; + } + + return true; + } + } + + /** + * output footnotes + * + * @param void + * @return void + */ + protected function flushFootnotes() + { + $out = false; + foreach ($this->footnotes as $k => $tag) { + if (!isset($tag['unstacked'])) { + if (!$out) { + $out = true; + $this->out("\n\n", true); + } else { + $this->out("\n", true); + } + $this->out(' [' . $tag['linkID'] . ']: ' . $this->getLinkReference($tag), true); + $tag['unstacked'] = true; + $this->footnotes[$k] = $tag; + } + } + } + + /** + * return formated link reference + * + * @param array $tag + * @return string link reference + */ + protected function getLinkReference($tag) + { + return $tag['href'] . (isset($tag['title']) ? ' "' . $tag['title'] . '"' : ''); + } + + /** + * flush enqued linebreaks + * + * @param void + * @return void + */ + protected function flushLinebreaks() + { + if ($this->lineBreaks && !empty($this->output)) { + $this->out(str_repeat("\n" . $this->indent, $this->lineBreaks), true); + } + $this->lineBreaks = 0; + } + + /** + * handle non Markdownable tags + * + * @param void + * @return void + */ + protected function handleTagToText() + { + if (!$this->keepHTML) { + if (!$this->parser->isStartTag && $this->parser->isBlockElement) { + $this->setLineBreaks(2); + } + } else { + // dont convert to markdown inside this tag + /** TODO: markdown extra **/ + if (!$this->parser->isEmptyTag) { + if ($this->parser->isStartTag) { + if (!$this->skipConversion) { + $this->skipConversion = $this->parser->tagName . '::' . implode('/', $this->parser->openTags); + } + } else { + if ($this->skipConversion == $this->parser->tagName . '::' . implode('/', $this->parser->openTags)) { + $this->skipConversion = false; + } + } + } + + if ($this->parser->isBlockElement) { + if ($this->parser->isStartTag) { + // looks like ins or del are block elements now + if (in_array($this->parent(), array('ins', 'del'))) { + $this->out("\n", true); + $this->indent(' '); + } + // don't indent inside
 tags
+                    if ($this->parser->tagName == 'pre') {
+                        $this->out($this->parser->node);
+                        static $indent;
+                        $indent = $this->indent;
+                        $this->indent = '';
+                    } else {
+                        $this->out($this->parser->node . "\n" . $this->indent);
+                        if (!$this->parser->isEmptyTag) {
+                            $this->indent('  ');
+                        } else {
+                            $this->setLineBreaks(1);
+                        }
+                        $this->parser->html = ltrim($this->parser->html);
+                    }
+                } else {
+                    if (!$this->parser->keepWhitespace) {
+                        $this->output = rtrim($this->output);
+                    }
+                    if ($this->parser->tagName != 'pre') {
+                        $this->indent('  ');
+                        $this->out("\n" . $this->indent . $this->parser->node);
+                    } else {
+                        // reset indentation
+                        $this->out($this->parser->node);
+                        static $indent;
+                        $this->indent = $indent;
+                    }
+
+                    if (in_array($this->parent(), array('ins', 'del'))) {
+                        // ins or del was block element
+                        $this->out("\n");
+                        $this->indent('  ');
+                    }
+                    if ($this->parser->tagName == 'li') {
+                        $this->setLineBreaks(1);
+                    } else {
+                        $this->setLineBreaks(2);
+                    }
+                }
+            } else {
+                $this->out($this->parser->node);
+            }
+            if (in_array($this->parser->tagName, array('code', 'pre'))) {
+                if ($this->parser->isStartTag) {
+                    $this->buffer();
+                } else {
+                    // add stuff so cleanup just reverses this
+                    $this->out(str_replace('<', '&lt;', str_replace('>', '&gt;', $this->unbuffer())));
+                }
+            }
+        }
+    }
+
+    /**
+     * handle plain text
+     *
+     * @param void
+     * @return void
+     */
+    protected function handleText()
+    {
+        if ($this->hasParent('pre') && strpos($this->parser->node, "\n") !== false) {
+            $this->parser->node = str_replace("\n", "\n" . $this->indent, $this->parser->node);
+        }
+        if (!$this->hasParent('code') && !$this->hasParent('pre')) {
+            // entity decode
+            $this->parser->node = $this->decode($this->parser->node);
+            if (!$this->skipConversion) {
+                // escape some chars in normal Text
+                $this->parser->node = preg_replace($this->escapeInText['search'], $this->escapeInText['replace'], $this->parser->node);
+            }
+        } else {
+            $this->parser->node = str_replace(array('"', '&apos'), array('"', '\''), $this->parser->node);
+        }
+        $this->out($this->parser->node);
+        $this->lastClosedTag = '';
+    }
+
+    /**
+     * handle  and  tags
+     *
+     * @param void
+     * @return void
+     */
+    protected function handleTag_em()
+    {
+        $this->out('_', true);
+    }
+
+    protected function handleTag_i()
+    {
+        $this->handleTag_em();
+    }
+
+    /**
+     * handle  and  tags
+     *
+     * @param void
+     * @return void
+     */
+    protected function handleTag_strong()
+    {
+        $this->out('**', true);
+    }
+
+    protected function handleTag_b()
+    {
+        $this->handleTag_strong();
+    }
+
+    /**
+     * handle 

tags + * + * @param void + * @return void + */ + protected function handleTag_h1() + { + $this->handleHeader(1); + } + + /** + * handle

tags + * + * @param void + * @return void + */ + protected function handleTag_h2() + { + $this->handleHeader(2); + } + + /** + * handle

tags + * + * @param void + * @return void + */ + protected function handleTag_h3() + { + $this->handleHeader(3); + } + + /** + * handle

tags + * + * @param void + * @return void + */ + protected function handleTag_h4() + { + $this->handleHeader(4); + } + + /** + * handle

tags + * + * @param void + * @return void + */ + protected function handleTag_h5() + { + $this->handleHeader(5); + } + + /** + * handle
tags + * + * @param void + * @return void + */ + protected function handleTag_h6() + { + $this->handleHeader(6); + } + + /** + * handle header tags (

-

) + * + * @param int $level 1-6 + * @return void + */ + protected function handleHeader($level) + { + if ($this->parser->isStartTag) { + $this->out(str_repeat('#', $level) . ' ', true); + } else { + $this->setLineBreaks(2); + } + } + + /** + * handle

tags + * + * @param void + * @return void + */ + protected function handleTag_p() + { + if (!$this->parser->isStartTag) { + $this->setLineBreaks(2); + } + } + + /** + * handle tags + * + * @param void + * @return void + */ + protected function handleTag_a() + { + if ($this->parser->isStartTag) { + $this->buffer(); + $this->handleTag_a_parser(); + $this->stack(); + } else { + $tag = $this->unstack(); + $buffer = $this->unbuffer(); + $this->handleTag_a_converter($tag, $buffer); + $this->out($this->handleTag_a_converter($tag, $buffer), true); + } + } + + /** + * handle tags parsing + * + * @param void + * @return void + */ + protected function handleTag_a_parser() + { + if (isset($this->parser->tagAttributes['title'])) { + $this->parser->tagAttributes['title'] = $this->decode($this->parser->tagAttributes['title']); + } else { + $this->parser->tagAttributes['title'] = null; + } + $this->parser->tagAttributes['href'] = $this->decode(trim($this->parser->tagAttributes['href'])); + } + + /** + * handle tags conversion + * + * @param array $tag + * @param string $buffer + * @return string The markdownified link + */ + protected function handleTag_a_converter($tag, $buffer) + { + if (empty($tag['href']) && empty($tag['title'])) { + // empty links... testcase mania, who would possibly do anything like that?! + return '[' . $buffer . ']()'; + } + + if ($buffer == $tag['href'] && empty($tag['title'])) { + // + return '<' . $buffer . '>'; + } + + $bufferDecoded = $this->decode(trim($buffer)); + if (substr($tag['href'], 0, 7) == 'mailto:' && 'mailto:' . $bufferDecoded == $tag['href']) { + if (is_null($tag['title'])) { + // + return '<' . $bufferDecoded . '>'; + } + // [mail@example.com][1] + // ... + // [1]: mailto:mail@example.com Title + $tag['href'] = 'mailto:' . $bufferDecoded; + } + + if ($this->linkPosition == self::LINK_IN_PARAGRAPH) { + return '[' . $buffer . '](' . $this->getLinkReference($tag) . ')'; + } + + // [This link][id] + foreach ($this->footnotes as $tag2) { + if ($tag2['href'] == $tag['href'] && $tag2['title'] === $tag['title']) { + $tag['linkID'] = $tag2['linkID']; + break; + } + } + if (!isset($tag['linkID'])) { + $tag['linkID'] = count($this->footnotes) + 1; + array_push($this->footnotes, $tag); + } + + return '[' . $buffer . '][' . $tag['linkID'] . ']'; + } + + /** + * handle tags + * + * @param void + * @return void + */ + protected function handleTag_img() + { + if (!$this->parser->isStartTag) { + return; // just to be sure this is really an empty tag... + } + + if (isset($this->parser->tagAttributes['title'])) { + $this->parser->tagAttributes['title'] = $this->decode($this->parser->tagAttributes['title']); + } else { + $this->parser->tagAttributes['title'] = null; + } + if (isset($this->parser->tagAttributes['alt'])) { + $this->parser->tagAttributes['alt'] = $this->decode($this->parser->tagAttributes['alt']); + } else { + $this->parser->tagAttributes['alt'] = null; + } + + if (empty($this->parser->tagAttributes['src'])) { + // support for "empty" images... dunno if this is really needed + // but there are some test cases which do that... + if (!empty($this->parser->tagAttributes['title'])) { + $this->parser->tagAttributes['title'] = ' ' . $this->parser->tagAttributes['title'] . ' '; + } + $this->out('![' . $this->parser->tagAttributes['alt'] . '](' . $this->parser->tagAttributes['title'] . ')', true); + + return; + } else { + $this->parser->tagAttributes['src'] = $this->decode($this->parser->tagAttributes['src']); + } + + $out = '![' . $this->parser->tagAttributes['alt'] . ']'; + if ($this->linkPosition == self::LINK_IN_PARAGRAPH) { + $out .= '(' . $this->parser->tagAttributes['src']; + if ($this->parser->tagAttributes['title']) { + $out .= ' "' . $this->parser->tagAttributes['title'] . '"'; + } + $out .= ')'; + $this->out($out, true); + return; + } + + // ![This image][id] + $link_id = false; + if (!empty($this->footnotes)) { + foreach ($this->footnotes as $tag) { + if ($tag['href'] == $this->parser->tagAttributes['src'] + && $tag['title'] === $this->parser->tagAttributes['title'] + ) { + $link_id = $tag['linkID']; + break; + } + } + } + if (!$link_id) { + $link_id = count($this->footnotes) + 1; + $tag = array( + 'href' => $this->parser->tagAttributes['src'], + 'linkID' => $link_id, + 'title' => $this->parser->tagAttributes['title'] + ); + array_push($this->footnotes, $tag); + } + $out .= '[' . $link_id . ']'; + + $this->out($out, true); + } + + /** + * handle tags + * + * @param void + * @return void + */ + protected function handleTag_code() + { + if ($this->hasParent('pre')) { + // ignore code blocks inside

+
+            return;
+        }
+        if ($this->parser->isStartTag) {
+            $this->buffer();
+        } else {
+            $buffer = $this->unbuffer();
+            // use as many backticks as needed
+            preg_match_all('#`+#', $buffer, $matches);
+            if (!empty($matches[0])) {
+                rsort($matches[0]);
+
+                $ticks = '`';
+                while (true) {
+                    if (!in_array($ticks, $matches[0])) {
+                        break;
+                    }
+                    $ticks .= '`';
+                }
+            } else {
+                $ticks = '`';
+            }
+            if ($buffer[0] == '`' || substr($buffer, -1) == '`') {
+                $buffer = ' ' . $buffer . ' ';
+            }
+            $this->out($ticks . $buffer . $ticks, true);
+        }
+    }
+
+    /**
+     * handle 
 tags
+     *
+     * @param void
+     * @return void
+     */
+    protected function handleTag_pre()
+    {
+        if ($this->keepHTML && $this->parser->isStartTag) {
+            // check if a simple  follows
+            if (!preg_match('#^\s*#Us', $this->parser->html)) {
+                // this is no standard markdown code block
+                $this->handleTagToText();
+
+                return;
+            }
+        }
+        $this->indent('    ');
+        if (!$this->parser->isStartTag) {
+            $this->setLineBreaks(2);
+        } else {
+            $this->parser->html = ltrim($this->parser->html);
+        }
+    }
+
+    /**
+     * handle 
tags + * + * @param void + * @return void + */ + protected function handleTag_blockquote() + { + $this->indent('> '); + } + + /** + * handle