From 547df2219ab4b870256f2ed90e36b97d8bf200bf Mon Sep 17 00:00:00 2001 From: Klaus Weidenbach Date: Tue, 23 May 2017 00:32:11 +0200 Subject: Replace Mardownify library with html-to-markdown library. --- .../src/Converter/BlockquoteConverter.php | 44 ++++++++ .../src/Converter/CodeConverter.php | 62 +++++++++++ .../src/Converter/CommentConverter.php | 26 +++++ .../src/Converter/ConverterInterface.php | 20 ++++ .../src/Converter/DefaultConverter.php | 50 +++++++++ .../src/Converter/DivConverter.php | 45 ++++++++ .../src/Converter/EmphasisConverter.php | 57 ++++++++++ .../src/Converter/HardBreakConverter.php | 41 +++++++ .../src/Converter/HeaderConverter.php | 78 +++++++++++++ .../src/Converter/HorizontalRuleConverter.php | 26 +++++ .../src/Converter/ImageConverter.php | 35 ++++++ .../src/Converter/LinkConverter.php | 52 +++++++++ .../src/Converter/ListBlockConverter.php | 26 +++++ .../src/Converter/ListItemConverter.php | 47 ++++++++ .../src/Converter/ParagraphConverter.php | 124 +++++++++++++++++++++ .../src/Converter/PreformattedConverter.php | 59 ++++++++++ .../src/Converter/TextConverter.php | 46 ++++++++ 17 files changed, 838 insertions(+) create mode 100644 vendor/league/html-to-markdown/src/Converter/BlockquoteConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/CodeConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/CommentConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/ConverterInterface.php create mode 100644 vendor/league/html-to-markdown/src/Converter/DefaultConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/DivConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/EmphasisConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/HardBreakConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/HeaderConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/HorizontalRuleConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/ImageConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/LinkConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/ListBlockConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/ListItemConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/ParagraphConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php create mode 100644 vendor/league/html-to-markdown/src/Converter/TextConverter.php (limited to 'vendor/league/html-to-markdown/src/Converter') diff --git a/vendor/league/html-to-markdown/src/Converter/BlockquoteConverter.php b/vendor/league/html-to-markdown/src/Converter/BlockquoteConverter.php new file mode 100644 index 000000000..eb2d09d17 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/BlockquoteConverter.php @@ -0,0 +1,44 @@ +' symbols to each line. + + $markdown = ''; + + $quote_content = trim($element->getValue()); + + $lines = preg_split('/\r\n|\r|\n/', $quote_content); + + $total_lines = count($lines); + + foreach ($lines as $i => $line) { + $markdown .= '> ' . $line . "\n"; + if ($i + 1 === $total_lines) { + $markdown .= "\n"; + } + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('blockquote'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/CodeConverter.php b/vendor/league/html-to-markdown/src/Converter/CodeConverter.php new file mode 100644 index 000000000..c8ec2c005 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/CodeConverter.php @@ -0,0 +1,62 @@ +getAttribute('class'); + + if ($classes) { + // Since tags can have more than one class, we need to find the one that starts with 'language-' + $classes = explode(' ', $classes); + foreach ($classes as $class) { + if (strpos($class, 'language-') !== false) { + // Found one, save it as the selected language and stop looping over the classes. + // The space after the language avoids gluing the actual code with the language tag + $language = str_replace('language-', '', $class) . ' '; + break; + } + } + } + + $markdown = ''; + $code = html_entity_decode($element->getChildrenAsString()); + + // In order to remove the code tags we need to search for them and, in the case of the opening tag + // use a regular expression to find the tag and the other attributes it might have + $code = preg_replace('/]*>/', '', $code); + $code = str_replace('', '', $code); + + // Checking if the code has multiple lines + $lines = preg_split('/\r\n|\r|\n/', $code); + if (count($lines) > 1) { + // Multiple lines detected, adding three backticks and newlines + $markdown .= '```' . $language . "\n" . $code . "\n" . '```'; + } else { + // One line of code, wrapping it on one backtick. + $markdown .= '`' . $language . $code . '`'; + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('code'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/CommentConverter.php b/vendor/league/html-to-markdown/src/Converter/CommentConverter.php new file mode 100644 index 000000000..55038b254 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/CommentConverter.php @@ -0,0 +1,26 @@ +config = $config; + } + + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + // If strip_tags is false (the default), preserve tags that don't have Markdown equivalents, + // such as nodes on their own. C14N() canonicalizes the node to a string. + // See: http://www.php.net/manual/en/domnode.c14n.php + if ($this->config->getOption('strip_tags', false)) { + return $element->getValue(); + } + + return html_entity_decode($element->getChildrenAsString()); + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array(self::DEFAULT_CONVERTER); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/DivConverter.php b/vendor/league/html-to-markdown/src/Converter/DivConverter.php new file mode 100644 index 000000000..656a0ba4d --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/DivConverter.php @@ -0,0 +1,45 @@ +config = $config; + } + + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + if ($this->config->getOption('strip_tags', false)) { + return $element->getValue() . "\n\n"; + } + + return html_entity_decode($element->getChildrenAsString()); + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('div'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/EmphasisConverter.php b/vendor/league/html-to-markdown/src/Converter/EmphasisConverter.php new file mode 100644 index 000000000..67250769b --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/EmphasisConverter.php @@ -0,0 +1,57 @@ +config = $config; + } + + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + $tag = $element->getTagName(); + $value = $element->getValue(); + + if (!trim($value)) { + return ''; + } + + if ($tag === 'i' || $tag === 'em') { + $style = $this->config->getOption('italic_style'); + } else { + $style = $this->config->getOption('bold_style'); + } + + $prefix = ltrim($value) !== $value ? ' ' : ''; + $suffix = rtrim($value) !== $value ? ' ' : ''; + + return $prefix . $style . trim($value) . $style . $suffix; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('em', 'i', 'strong', 'b'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/HardBreakConverter.php b/vendor/league/html-to-markdown/src/Converter/HardBreakConverter.php new file mode 100644 index 000000000..37cd44e73 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/HardBreakConverter.php @@ -0,0 +1,41 @@ +config = $config; + } + + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + return $this->config->getOption('hard_break') ? "\n" : " \n"; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('br'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php b/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php new file mode 100644 index 000000000..d117e7d36 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php @@ -0,0 +1,78 @@ +config = $config; + } + + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + $level = (int) substr($element->getTagName(), 1, 1); + $style = $this->config->getOption('header_style', self::STYLE_SETEXT); + + if (($level === 1 || $level === 2) && !$element->isDescendantOf('blockquote') && $style === self::STYLE_SETEXT) { + return $this->createSetextHeader($level, $element->getValue()); + } + + return $this->createAtxHeader($level, $element->getValue()); + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); + } + + /** + * @param int $level + * @param string $content + * + * @return string + */ + private function createSetextHeader($level, $content) + { + $length = function_exists('mb_strlen') ? mb_strlen($content, 'utf-8') : strlen($content); + $underline = ($level === 1) ? '=' : '-'; + + return $content . "\n" . str_repeat($underline, $length) . "\n\n"; + } + + /** + * @param int $level + * @param string $content + * + * @return string + */ + private function createAtxHeader($level, $content) + { + $prefix = str_repeat('#', $level) . ' '; + + return $prefix . $content . "\n\n"; + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/HorizontalRuleConverter.php b/vendor/league/html-to-markdown/src/Converter/HorizontalRuleConverter.php new file mode 100644 index 000000000..8f54f9397 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/HorizontalRuleConverter.php @@ -0,0 +1,26 @@ +getAttribute('src'); + $alt = $element->getAttribute('alt'); + $title = $element->getAttribute('title'); + + if ($title !== '') { + // No newlines added. should be in a block-level element. + return '![' . $alt . '](' . $src . ' "' . $title . '")'; + } + + return '![' . $alt . '](' . $src . ')'; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('img'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/LinkConverter.php b/vendor/league/html-to-markdown/src/Converter/LinkConverter.php new file mode 100644 index 000000000..f0765f38b --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/LinkConverter.php @@ -0,0 +1,52 @@ +getAttribute('href'); + $title = $element->getAttribute('title'); + $text = trim($element->getValue()); + + if ($title !== '') { + $markdown = '[' . $text . '](' . $href . ' "' . $title . '")'; + } elseif ($href === $text && $this->isValidAutolink($href)) { + $markdown = '<' . $href . '>'; + } else { + $markdown = '[' . $text . '](' . $href . ')'; + } + + if (!$href) { + $markdown = html_entity_decode($element->getChildrenAsString()); + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('a'); + } + + /** + * @param string $href + * + * @return bool + */ + private function isValidAutolink($href) + { + return preg_match('/^[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*/i', $href) === 1; + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/ListBlockConverter.php b/vendor/league/html-to-markdown/src/Converter/ListBlockConverter.php new file mode 100644 index 000000000..07a4c85a9 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/ListBlockConverter.php @@ -0,0 +1,26 @@ +getValue() . "\n"; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('ol', 'ul'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/ListItemConverter.php b/vendor/league/html-to-markdown/src/Converter/ListItemConverter.php new file mode 100644 index 000000000..dafec077c --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/ListItemConverter.php @@ -0,0 +1,47 @@ +getParent()->getTagName(); + + // Add spaces to start for nested list items + $level = $element->getListItemLevel($element); + + $prefixForParagraph = str_repeat(' ', $level + 1); + $value = trim(implode("\n" . $prefixForParagraph, explode("\n", trim($element->getValue())))); + + // If list item is the first in a nested list, add a newline before it + $prefix = ''; + if ($level > 0 && $element->getSiblingPosition() === 1) { + $prefix = "\n"; + } + + if ($list_type === 'ul') { + return $prefix . '- ' . $value . "\n"; + } + + $number = $element->getSiblingPosition(); + + return $prefix . $number . '. ' . $value . "\n"; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('li'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/ParagraphConverter.php b/vendor/league/html-to-markdown/src/Converter/ParagraphConverter.php new file mode 100644 index 000000000..cf852bfcf --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/ParagraphConverter.php @@ -0,0 +1,124 @@ +getValue(); + + $markdown = ''; + + $lines = preg_split('/\r\n|\r|\n/', $value); + foreach ($lines as $line) { + /* + * Some special characters need to be escaped based on the position that they appear + * The following function will deal with those special cases. + */ + $markdown .= $this->escapeSpecialCharacters($line); + $markdown .= "\n"; + } + + return trim($markdown) !== '' ? rtrim($markdown) . "\n\n" : ''; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('p'); + } + + /** + * @param string $line + * + * @return string + */ + private function escapeSpecialCharacters($line) + { + $line = $this->escapeFirstCharacters($line); + $line = $this->escapeOtherCharacters($line); + $line = $this->escapeOtherCharactersRegex($line); + + return $line; + } + + /** + * @param string $line + * + * @return string + */ + private function escapeFirstCharacters($line) + { + $escapable = array( + '>', + '- ', + '+ ', + '--', + '~~~', + '---', + '- - -' + ); + + foreach ($escapable as $i) { + if (strpos(ltrim($line), $i) === 0) { + // Found a character that must be escaped, adding a backslash before + return '\\' . ltrim($line); + } + } + + return $line; + } + + /** + * @param string $line + * + * @return string + */ + private function escapeOtherCharacters($line) + { + $escapable = array( + '