From 547df2219ab4b870256f2ed90e36b97d8bf200bf Mon Sep 17 00:00:00 2001 From: Klaus Weidenbach Date: Tue, 23 May 2017 00:32:11 +0200 Subject: Replace Mardownify library with html-to-markdown library. --- vendor/pixel418/markdownify/src/Converter.php | 1400 -------------------- vendor/pixel418/markdownify/src/ConverterExtra.php | 573 -------- vendor/pixel418/markdownify/src/Parser.php | 564 -------- 3 files changed, 2537 deletions(-) delete mode 100644 vendor/pixel418/markdownify/src/Converter.php delete mode 100644 vendor/pixel418/markdownify/src/ConverterExtra.php delete mode 100644 vendor/pixel418/markdownify/src/Parser.php (limited to 'vendor/pixel418/markdownify/src') diff --git a/vendor/pixel418/markdownify/src/Converter.php b/vendor/pixel418/markdownify/src/Converter.php deleted file mode 100644 index 77c62dc7e..000000000 --- a/vendor/pixel418/markdownify/src/Converter.php +++ /dev/null @@ -1,1400 +0,0 @@ - - */ - protected $notConverted = array(); - - /** - * skip conversion to markdown - * - * @var bool - */ - protected $skipConversion = false; - - /* options */ - - /** - * keep html tags which cannot be converted to markdown - * - * @var bool - */ - protected $keepHTML = false; - - /** - * wrap output, set to 0 to skip wrapping - * - * @var int - */ - protected $bodyWidth = 0; - - /** - * minimum body width - * - * @var int - */ - protected $minBodyWidth = 25; - - /** - * position where the link reference will be displayed - * - * - * @var int - */ - protected $linkPosition; - const LINK_AFTER_CONTENT = 0; - const LINK_AFTER_PARAGRAPH = 1; - const LINK_IN_PARAGRAPH = 2; - - /** - * stores current buffers - * - * @var array - */ - protected $buffer = array(); - - /** - * stores current buffers - * - * @var array - */ - protected $footnotes = array(); - - /** - * tags with elements which can be handled by markdown - * - * @var array - */ - protected $isMarkdownable = array( - 'p' => array(), - 'ul' => array(), - 'ol' => array(), - 'li' => array(), - 'br' => array(), - 'blockquote' => array(), - 'code' => array(), - 'pre' => array(), - 'a' => array( - 'href' => 'required', - 'title' => 'optional', - ), - 'strong' => array(), - 'b' => array(), - 'em' => array(), - 'i' => array(), - 'img' => array( - 'src' => 'required', - 'alt' => 'optional', - 'title' => 'optional', - ), - 'h1' => array(), - 'h2' => array(), - 'h3' => array(), - 'h4' => array(), - 'h5' => array(), - 'h6' => array(), - 'hr' => array(), - ); - - /** - * html tags to be ignored (contents will be parsed) - * - * @var array - */ - protected $ignore = array( - 'html', - 'body', - ); - - /** - * html tags to be dropped (contents will not be parsed!) - * - * @var array - */ - protected $drop = array( - 'script', - 'head', - 'style', - 'form', - 'area', - 'object', - 'param', - 'iframe', - ); - - /** - * html block tags that allow inline & block children - * - * @var array - */ - protected $allowMixedChildren = array( - 'li' - ); - - /** - * Markdown indents which could be wrapped - * @note: use strings in regex format - * - * @var array - */ - protected $wrappableIndents = array( - '\* ', // ul - '\d. ', // ol - '\d\d. ', // ol - '> ', // blockquote - '', // p - ); - - /** - * list of chars which have to be escaped in normal text - * @note: use strings in regex format - * - * @var array - * - * TODO: what's with block chars / sequences at the beginning of a block? - */ - protected $escapeInText = array( - '\*\*([^*]+)\*\*' => '\*\*$1\*\*', // strong - '\*([^*]+)\*' => '\*$1\*', // em - '__(?! |_)(.+)(?!<_| )__' => '\_\_$1\_\_', // strong - '_(?! |_)(.+)(?!<_| )_' => '\_$1\_', // em - '([-*_])([ ]{0,2}\1){2,}' => '\\\\$0', // hr - '`' => '\`', // code - '\[(.+)\](\s*\()' => '\[$1\]$2', // links: [text] (url) => [text\] (url) - '\[(.+)\](\s*)\[(.*)\]' => '\[$1\]$2\[$3\]', // links: [text][id] => [text\][id\] - '^#(#{0,5}) ' => '\#$1 ', // header - ); - - /** - * wether last processed node was a block tag or not - * - * @var bool - */ - protected $lastWasBlockTag = false; - - /** - * name of last closed tag - * - * @var string - */ - protected $lastClosedTag = ''; - - /** - * number of line breaks before next inline output - */ - protected $lineBreaks = 0; - - /** - * node stack, e.g. for and tags - * - * @var array - */ - protected $stack = array(); - - /** - * current indentation - * - * @var string - */ - protected $indent = ''; - - /** - * constructor, set options, setup parser - * - * @param int $linkPosition define the position of links - * @param int $bodyWidth whether or not to wrap the output to the given width - * defaults to false - * @param bool $keepHTML whether to keep non markdownable HTML or to discard it - * defaults to true (HTML will be kept) - * @return void - */ - public function __construct($linkPosition = self::LINK_AFTER_CONTENT, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML) - { - $this->linkPosition = $linkPosition; - $this->keepHTML = $keepHTML; - - if ($bodyWidth > $this->minBodyWidth) { - $this->bodyWidth = intval($bodyWidth); - } else { - $this->bodyWidth = false; - } - - $this->parser = new Parser; - $this->parser->noTagsInCode = true; - - // we don't have to do this every time - $search = array(); - $replace = array(); - foreach ($this->escapeInText as $s => $r) { - array_push($search, '@(?escapeInText = array( - 'search' => $search, - 'replace' => $replace - ); - } - - /** - * parse a HTML string - * - * @param string $html - * @return string markdown formatted - */ - public function parseString($html) - { - $this->resetState(); - - $this->parser->html = $html; - $this->parse(); - - return $this->output; - } - - /** - * set the position where the link reference will be displayed - * - * @param int $linkPosition - * @return void - */ - public function setLinkPosition($linkPosition) - { - $this->linkPosition = $linkPosition; - } - - /** - * set keep HTML tags which cannot be converted to markdown - * - * @param bool $linkPosition - * @return void - */ - public function setKeepHTML($keepHTML) - { - $this->keepHTML = $keepHTML; - } - - /** - * iterate through the nodes and decide what we - * shall do with the current node - * - * @param void - * @return void - */ - protected function parse() - { - $this->output = ''; - // drop tags - $this->parser->html = preg_replace('#<(' . implode('|', $this->drop) . ')[^>]*>.*#sU', '', $this->parser->html); - while ($this->parser->nextNode()) { - switch ($this->parser->nodeType) { - case 'doctype': - break; - case 'pi': - case 'comment': - if ($this->keepHTML) { - $this->flushLinebreaks(); - $this->out($this->parser->node); - $this->setLineBreaks(2); - } - // else drop - break; - case 'text': - $this->handleText(); - break; - case 'tag': - if (in_array($this->parser->tagName, $this->ignore)) { - break; - } - // If the previous tag was not a block element, we simulate a paragraph tag - if ($this->parser->isBlockElement && $this->parser->isNextToInlineContext && !in_array($this->parent(), $this->allowMixedChildren)) { - $this->setLineBreaks(2); - } - if ($this->parser->isStartTag) { - $this->flushLinebreaks(); - } - if ($this->skipConversion) { - $this->isMarkdownable(); // update notConverted - $this->handleTagToText(); - continue; - } - - // block elements - if (!$this->parser->keepWhitespace && $this->parser->isBlockElement) { - $this->fixBlockElementSpacing(); - } - - // inline elements - if (!$this->parser->keepWhitespace && $this->parser->isInlineContext) { - $this->fixInlineElementSpacing(); - } - - if ($this->isMarkdownable()) { - if ($this->parser->isBlockElement && $this->parser->isStartTag && !$this->lastWasBlockTag && !empty($this->output)) { - if (!empty($this->buffer)) { - $str =& $this->buffer[count($this->buffer) - 1]; - } else { - $str =& $this->output; - } - if (substr($str, -strlen($this->indent) - 1) != "\n" . $this->indent) { - $str .= "\n" . $this->indent; - } - } - $func = 'handleTag_' . $this->parser->tagName; - $this->$func(); - if ($this->linkPosition == self::LINK_AFTER_PARAGRAPH && $this->parser->isBlockElement && !$this->parser->isStartTag && empty($this->parser->openTags)) { - $this->flushFootnotes(); - } - if (!$this->parser->isStartTag) { - $this->lastClosedTag = $this->parser->tagName; - } - } else { - $this->handleTagToText(); - $this->lastClosedTag = ''; - } - break; - default: - trigger_error('invalid node type', E_USER_ERROR); - break; - } - $this->lastWasBlockTag = $this->parser->nodeType == 'tag' && $this->parser->isStartTag && $this->parser->isBlockElement; - } - if (!empty($this->buffer)) { - // trigger_error('buffer was not flushed, this is a bug. please report!', E_USER_WARNING); - while (!empty($this->buffer)) { - $this->out($this->unbuffer()); - } - } - // cleanup - $this->output = rtrim(str_replace('&', '&', str_replace('<', '<', str_replace('>', '>', $this->output)))); - // end parsing, flush stacked tags - $this->flushFootnotes(); - $this->stack = array(); - } - - /** - * check if current tag can be converted to Markdown - * - * @param void - * @return bool - */ - protected function isMarkdownable() - { - if (!isset($this->isMarkdownable[$this->parser->tagName])) { - // simply not markdownable - - return false; - } - if ($this->parser->isStartTag) { - $return = true; - if ($this->keepHTML) { - $diff = array_diff(array_keys($this->parser->tagAttributes), array_keys($this->isMarkdownable[$this->parser->tagName])); - if (!empty($diff)) { - // non markdownable attributes given - $return = false; - } - } - if ($return) { - foreach ($this->isMarkdownable[$this->parser->tagName] as $attr => $type) { - if ($type == 'required' && !isset($this->parser->tagAttributes[$attr])) { - // required markdown attribute not given - $return = false; - break; - } - } - } - if (!$return) { - array_push($this->notConverted, $this->parser->tagName . '::' . implode('/', $this->parser->openTags)); - } - - return $return; - } else { - if (!empty($this->notConverted) && end($this->notConverted) === $this->parser->tagName . '::' . implode('/', $this->parser->openTags)) { - array_pop($this->notConverted); - - return false; - } - - return true; - } - } - - /** - * output footnotes - * - * @param void - * @return void - */ - protected function flushFootnotes() - { - $out = false; - foreach ($this->footnotes as $k => $tag) { - if (!isset($tag['unstacked'])) { - if (!$out) { - $out = true; - $this->out("\n\n", true); - } else { - $this->out("\n", true); - } - $this->out(' [' . $tag['linkID'] . ']: ' . $this->getLinkReference($tag), true); - $tag['unstacked'] = true; - $this->footnotes[$k] = $tag; - } - } - } - - /** - * return formated link reference - * - * @param array $tag - * @return string link reference - */ - protected function getLinkReference($tag) - { - return $tag['href'] . (isset($tag['title']) ? ' "' . $tag['title'] . '"' : ''); - } - - /** - * flush enqued linebreaks - * - * @param void - * @return void - */ - protected function flushLinebreaks() - { - if ($this->lineBreaks && !empty($this->output)) { - $this->out(str_repeat("\n" . $this->indent, $this->lineBreaks), true); - } - $this->lineBreaks = 0; - } - - /** - * handle non Markdownable tags - * - * @param void - * @return void - */ - protected function handleTagToText() - { - if (!$this->keepHTML) { - if (!$this->parser->isStartTag && $this->parser->isBlockElement) { - $this->setLineBreaks(2); - } - } else { - // dont convert to markdown inside this tag - /** TODO: markdown extra **/ - if (!$this->parser->isEmptyTag) { - if ($this->parser->isStartTag) { - if (!$this->skipConversion) { - $this->skipConversion = $this->parser->tagName . '::' . implode('/', $this->parser->openTags); - } - } else { - if ($this->skipConversion == $this->parser->tagName . '::' . implode('/', $this->parser->openTags)) { - $this->skipConversion = false; - } - } - } - - if ($this->parser->isBlockElement) { - if ($this->parser->isStartTag) { - // looks like ins or del are block elements now - if (in_array($this->parent(), array('ins', 'del'))) { - $this->out("\n", true); - $this->indent(' '); - } - // don't indent inside
 tags
-                    if ($this->parser->tagName == 'pre') {
-                        $this->out($this->parser->node);
-                        static $indent;
-                        $indent = $this->indent;
-                        $this->indent = '';
-                    } else {
-                        $this->out($this->parser->node . "\n" . $this->indent);
-                        if (!$this->parser->isEmptyTag) {
-                            $this->indent('  ');
-                        } else {
-                            $this->setLineBreaks(1);
-                        }
-                        $this->parser->html = ltrim($this->parser->html);
-                    }
-                } else {
-                    if (!$this->parser->keepWhitespace) {
-                        $this->output = rtrim($this->output);
-                    }
-                    if ($this->parser->tagName != 'pre') {
-                        $this->indent('  ');
-                        $this->out("\n" . $this->indent . $this->parser->node);
-                    } else {
-                        // reset indentation
-                        $this->out($this->parser->node);
-                        static $indent;
-                        $this->indent = $indent;
-                    }
-
-                    if (in_array($this->parent(), array('ins', 'del'))) {
-                        // ins or del was block element
-                        $this->out("\n");
-                        $this->indent('  ');
-                    }
-                    if ($this->parser->tagName == 'li') {
-                        $this->setLineBreaks(1);
-                    } else {
-                        $this->setLineBreaks(2);
-                    }
-                }
-            } else {
-                $this->out($this->parser->node);
-            }
-            if (in_array($this->parser->tagName, array('code', 'pre'))) {
-                if ($this->parser->isStartTag) {
-                    $this->buffer();
-                } else {
-                    // add stuff so cleanup just reverses this
-                    $this->out(str_replace('<', '&lt;', str_replace('>', '&gt;', $this->unbuffer())));
-                }
-            }
-        }
-    }
-
-    /**
-     * handle plain text
-     *
-     * @param void
-     * @return void
-     */
-    protected function handleText()
-    {
-        if ($this->hasParent('pre') && strpos($this->parser->node, "\n") !== false) {
-            $this->parser->node = str_replace("\n", "\n" . $this->indent, $this->parser->node);
-        }
-        if (!$this->hasParent('code') && !$this->hasParent('pre')) {
-            // entity decode
-            $this->parser->node = $this->decode($this->parser->node);
-            if (!$this->skipConversion) {
-                // escape some chars in normal Text
-                $this->parser->node = preg_replace($this->escapeInText['search'], $this->escapeInText['replace'], $this->parser->node);
-            }
-        } else {
-            $this->parser->node = str_replace(array('"', '&apos'), array('"', '\''), $this->parser->node);
-        }
-        $this->out($this->parser->node);
-        $this->lastClosedTag = '';
-    }
-
-    /**
-     * handle  and  tags
-     *
-     * @param void
-     * @return void
-     */
-    protected function handleTag_em()
-    {
-        $this->out('_', true);
-    }
-
-    protected function handleTag_i()
-    {
-        $this->handleTag_em();
-    }
-
-    /**
-     * handle  and  tags
-     *
-     * @param void
-     * @return void
-     */
-    protected function handleTag_strong()
-    {
-        $this->out('**', true);
-    }
-
-    protected function handleTag_b()
-    {
-        $this->handleTag_strong();
-    }
-
-    /**
-     * handle 

tags - * - * @param void - * @return void - */ - protected function handleTag_h1() - { - $this->handleHeader(1); - } - - /** - * handle

tags - * - * @param void - * @return void - */ - protected function handleTag_h2() - { - $this->handleHeader(2); - } - - /** - * handle

tags - * - * @param void - * @return void - */ - protected function handleTag_h3() - { - $this->handleHeader(3); - } - - /** - * handle

tags - * - * @param void - * @return void - */ - protected function handleTag_h4() - { - $this->handleHeader(4); - } - - /** - * handle

tags - * - * @param void - * @return void - */ - protected function handleTag_h5() - { - $this->handleHeader(5); - } - - /** - * handle
tags - * - * @param void - * @return void - */ - protected function handleTag_h6() - { - $this->handleHeader(6); - } - - /** - * handle header tags (

-

) - * - * @param int $level 1-6 - * @return void - */ - protected function handleHeader($level) - { - if ($this->parser->isStartTag) { - $this->out(str_repeat('#', $level) . ' ', true); - } else { - $this->setLineBreaks(2); - } - } - - /** - * handle

tags - * - * @param void - * @return void - */ - protected function handleTag_p() - { - if (!$this->parser->isStartTag) { - $this->setLineBreaks(2); - } - } - - /** - * handle tags - * - * @param void - * @return void - */ - protected function handleTag_a() - { - if ($this->parser->isStartTag) { - $this->buffer(); - $this->handleTag_a_parser(); - $this->stack(); - } else { - $tag = $this->unstack(); - $buffer = $this->unbuffer(); - $this->handleTag_a_converter($tag, $buffer); - $this->out($this->handleTag_a_converter($tag, $buffer), true); - } - } - - /** - * handle tags parsing - * - * @param void - * @return void - */ - protected function handleTag_a_parser() - { - if (isset($this->parser->tagAttributes['title'])) { - $this->parser->tagAttributes['title'] = $this->decode($this->parser->tagAttributes['title']); - } else { - $this->parser->tagAttributes['title'] = null; - } - $this->parser->tagAttributes['href'] = $this->decode(trim($this->parser->tagAttributes['href'])); - } - - /** - * handle tags conversion - * - * @param array $tag - * @param string $buffer - * @return string The markdownified link - */ - protected function handleTag_a_converter($tag, $buffer) - { - if (empty($tag['href']) && empty($tag['title'])) { - // empty links... testcase mania, who would possibly do anything like that?! - return '[' . $buffer . ']()'; - } - - if ($buffer == $tag['href'] && empty($tag['title'])) { - // - return '<' . $buffer . '>'; - } - - $bufferDecoded = $this->decode(trim($buffer)); - if (substr($tag['href'], 0, 7) == 'mailto:' && 'mailto:' . $bufferDecoded == $tag['href']) { - if (is_null($tag['title'])) { - // - return '<' . $bufferDecoded . '>'; - } - // [mail@example.com][1] - // ... - // [1]: mailto:mail@example.com Title - $tag['href'] = 'mailto:' . $bufferDecoded; - } - - if ($this->linkPosition == self::LINK_IN_PARAGRAPH) { - return '[' . $buffer . '](' . $this->getLinkReference($tag) . ')'; - } - - // [This link][id] - foreach ($this->footnotes as $tag2) { - if ($tag2['href'] == $tag['href'] && $tag2['title'] === $tag['title']) { - $tag['linkID'] = $tag2['linkID']; - break; - } - } - if (!isset($tag['linkID'])) { - $tag['linkID'] = count($this->footnotes) + 1; - array_push($this->footnotes, $tag); - } - - return '[' . $buffer . '][' . $tag['linkID'] . ']'; - } - - /** - * handle tags - * - * @param void - * @return void - */ - protected function handleTag_img() - { - if (!$this->parser->isStartTag) { - return; // just to be sure this is really an empty tag... - } - - if (isset($this->parser->tagAttributes['title'])) { - $this->parser->tagAttributes['title'] = $this->decode($this->parser->tagAttributes['title']); - } else { - $this->parser->tagAttributes['title'] = null; - } - if (isset($this->parser->tagAttributes['alt'])) { - $this->parser->tagAttributes['alt'] = $this->decode($this->parser->tagAttributes['alt']); - } else { - $this->parser->tagAttributes['alt'] = null; - } - - if (empty($this->parser->tagAttributes['src'])) { - // support for "empty" images... dunno if this is really needed - // but there are some test cases which do that... - if (!empty($this->parser->tagAttributes['title'])) { - $this->parser->tagAttributes['title'] = ' ' . $this->parser->tagAttributes['title'] . ' '; - } - $this->out('![' . $this->parser->tagAttributes['alt'] . '](' . $this->parser->tagAttributes['title'] . ')', true); - - return; - } else { - $this->parser->tagAttributes['src'] = $this->decode($this->parser->tagAttributes['src']); - } - - $out = '![' . $this->parser->tagAttributes['alt'] . ']'; - if ($this->linkPosition == self::LINK_IN_PARAGRAPH) { - $out .= '(' . $this->parser->tagAttributes['src']; - if ($this->parser->tagAttributes['title']) { - $out .= ' "' . $this->parser->tagAttributes['title'] . '"'; - } - $out .= ')'; - $this->out($out, true); - return; - } - - // ![This image][id] - $link_id = false; - if (!empty($this->footnotes)) { - foreach ($this->footnotes as $tag) { - if ($tag['href'] == $this->parser->tagAttributes['src'] - && $tag['title'] === $this->parser->tagAttributes['title'] - ) { - $link_id = $tag['linkID']; - break; - } - } - } - if (!$link_id) { - $link_id = count($this->footnotes) + 1; - $tag = array( - 'href' => $this->parser->tagAttributes['src'], - 'linkID' => $link_id, - 'title' => $this->parser->tagAttributes['title'] - ); - array_push($this->footnotes, $tag); - } - $out .= '[' . $link_id . ']'; - - $this->out($out, true); - } - - /** - * handle tags - * - * @param void - * @return void - */ - protected function handleTag_code() - { - if ($this->hasParent('pre')) { - // ignore code blocks inside

-
-            return;
-        }
-        if ($this->parser->isStartTag) {
-            $this->buffer();
-        } else {
-            $buffer = $this->unbuffer();
-            // use as many backticks as needed
-            preg_match_all('#`+#', $buffer, $matches);
-            if (!empty($matches[0])) {
-                rsort($matches[0]);
-
-                $ticks = '`';
-                while (true) {
-                    if (!in_array($ticks, $matches[0])) {
-                        break;
-                    }
-                    $ticks .= '`';
-                }
-            } else {
-                $ticks = '`';
-            }
-            if ($buffer[0] == '`' || substr($buffer, -1) == '`') {
-                $buffer = ' ' . $buffer . ' ';
-            }
-            $this->out($ticks . $buffer . $ticks, true);
-        }
-    }
-
-    /**
-     * handle 
 tags
-     *
-     * @param void
-     * @return void
-     */
-    protected function handleTag_pre()
-    {
-        if ($this->keepHTML && $this->parser->isStartTag) {
-            // check if a simple  follows
-            if (!preg_match('#^\s*#Us', $this->parser->html)) {
-                // this is no standard markdown code block
-                $this->handleTagToText();
-
-                return;
-            }
-        }
-        $this->indent('    ');
-        if (!$this->parser->isStartTag) {
-            $this->setLineBreaks(2);
-        } else {
-            $this->parser->html = ltrim($this->parser->html);
-        }
-    }
-
-    /**
-     * handle 
tags - * - * @param void - * @return void - */ - protected function handleTag_blockquote() - { - $this->indent('> '); - } - - /** - * handle