diff options
author | git-marijus <mario@mariovavti.com> | 2017-05-23 21:39:47 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-23 21:39:47 +0200 |
commit | dea0d07b9af9a5927dd524a3e486317690a7e112 (patch) | |
tree | 3e990b35eb939911bb7949c2f5d633fa3d788faf /vendor/league/html-to-markdown/src/Converter | |
parent | 50e9d024581ddf57f37a6302bc089a88237657bb (diff) | |
parent | 547df2219ab4b870256f2ed90e36b97d8bf200bf (diff) | |
download | volse-hubzilla-dea0d07b9af9a5927dd524a3e486317690a7e112.tar.gz volse-hubzilla-dea0d07b9af9a5927dd524a3e486317690a7e112.tar.bz2 volse-hubzilla-dea0d07b9af9a5927dd524a3e486317690a7e112.zip |
Merge pull request #794 from dawnbreak/dev
Replace Mardownify library with html-to-markdown library.
Diffstat (limited to 'vendor/league/html-to-markdown/src/Converter')
17 files changed, 838 insertions, 0 deletions
diff --git a/vendor/league/html-to-markdown/src/Converter/BlockquoteConverter.php b/vendor/league/html-to-markdown/src/Converter/BlockquoteConverter.php new file mode 100644 index 000000000..eb2d09d17 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/BlockquoteConverter.php @@ -0,0 +1,44 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +class BlockquoteConverter implements ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + // Contents should have already been converted to Markdown by this point, + // so we just need to add '>' symbols to each line. + + $markdown = ''; + + $quote_content = trim($element->getValue()); + + $lines = preg_split('/\r\n|\r|\n/', $quote_content); + + $total_lines = count($lines); + + foreach ($lines as $i => $line) { + $markdown .= '> ' . $line . "\n"; + if ($i + 1 === $total_lines) { + $markdown .= "\n"; + } + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('blockquote'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/CodeConverter.php b/vendor/league/html-to-markdown/src/Converter/CodeConverter.php new file mode 100644 index 000000000..c8ec2c005 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/CodeConverter.php @@ -0,0 +1,62 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +class CodeConverter implements ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + $language = null; + + // Checking for language class on the code block + $classes = $element->getAttribute('class'); + + if ($classes) { + // Since tags can have more than one class, we need to find the one that starts with 'language-' + $classes = explode(' ', $classes); + foreach ($classes as $class) { + if (strpos($class, 'language-') !== false) { + // Found one, save it as the selected language and stop looping over the classes. + // The space after the language avoids gluing the actual code with the language tag + $language = str_replace('language-', '', $class) . ' '; + break; + } + } + } + + $markdown = ''; + $code = html_entity_decode($element->getChildrenAsString()); + + // In order to remove the code tags we need to search for them and, in the case of the opening tag + // use a regular expression to find the tag and the other attributes it might have + $code = preg_replace('/<code\b[^>]*>/', '', $code); + $code = str_replace('</code>', '', $code); + + // Checking if the code has multiple lines + $lines = preg_split('/\r\n|\r|\n/', $code); + if (count($lines) > 1) { + // Multiple lines detected, adding three backticks and newlines + $markdown .= '```' . $language . "\n" . $code . "\n" . '```'; + } else { + // One line of code, wrapping it on one backtick. + $markdown .= '`' . $language . $code . '`'; + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('code'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/CommentConverter.php b/vendor/league/html-to-markdown/src/Converter/CommentConverter.php new file mode 100644 index 000000000..55038b254 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/CommentConverter.php @@ -0,0 +1,26 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +class CommentConverter implements ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + return ''; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('#comment'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/ConverterInterface.php b/vendor/league/html-to-markdown/src/Converter/ConverterInterface.php new file mode 100644 index 000000000..8530559a0 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/ConverterInterface.php @@ -0,0 +1,20 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +interface ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element); + + /** + * @return string[] + */ + public function getSupportedTags(); +} diff --git a/vendor/league/html-to-markdown/src/Converter/DefaultConverter.php b/vendor/league/html-to-markdown/src/Converter/DefaultConverter.php new file mode 100644 index 000000000..964a71093 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/DefaultConverter.php @@ -0,0 +1,50 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\Configuration; +use League\HTMLToMarkdown\ConfigurationAwareInterface; +use League\HTMLToMarkdown\ElementInterface; + +class DefaultConverter implements ConverterInterface, ConfigurationAwareInterface +{ + const DEFAULT_CONVERTER = '_default'; + + /** + * @var Configuration + */ + protected $config; + + /** + * @param Configuration $config + */ + public function setConfig(Configuration $config) + { + $this->config = $config; + } + + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + // If strip_tags is false (the default), preserve tags that don't have Markdown equivalents, + // such as <span> nodes on their own. C14N() canonicalizes the node to a string. + // See: http://www.php.net/manual/en/domnode.c14n.php + if ($this->config->getOption('strip_tags', false)) { + return $element->getValue(); + } + + return html_entity_decode($element->getChildrenAsString()); + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array(self::DEFAULT_CONVERTER); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/DivConverter.php b/vendor/league/html-to-markdown/src/Converter/DivConverter.php new file mode 100644 index 000000000..656a0ba4d --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/DivConverter.php @@ -0,0 +1,45 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\Configuration; +use League\HTMLToMarkdown\ConfigurationAwareInterface; +use League\HTMLToMarkdown\ElementInterface; + +class DivConverter implements ConverterInterface, ConfigurationAwareInterface +{ + /** + * @var Configuration + */ + protected $config; + + /** + * @param Configuration $config + */ + public function setConfig(Configuration $config) + { + $this->config = $config; + } + + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + if ($this->config->getOption('strip_tags', false)) { + return $element->getValue() . "\n\n"; + } + + return html_entity_decode($element->getChildrenAsString()); + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('div'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/EmphasisConverter.php b/vendor/league/html-to-markdown/src/Converter/EmphasisConverter.php new file mode 100644 index 000000000..67250769b --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/EmphasisConverter.php @@ -0,0 +1,57 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\Configuration; +use League\HTMLToMarkdown\ConfigurationAwareInterface; +use League\HTMLToMarkdown\ElementInterface; + +class EmphasisConverter implements ConverterInterface, ConfigurationAwareInterface +{ + /** + * @var Configuration + */ + protected $config; + + /** + * @param Configuration $config + */ + public function setConfig(Configuration $config) + { + $this->config = $config; + } + + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + $tag = $element->getTagName(); + $value = $element->getValue(); + + if (!trim($value)) { + return ''; + } + + if ($tag === 'i' || $tag === 'em') { + $style = $this->config->getOption('italic_style'); + } else { + $style = $this->config->getOption('bold_style'); + } + + $prefix = ltrim($value) !== $value ? ' ' : ''; + $suffix = rtrim($value) !== $value ? ' ' : ''; + + return $prefix . $style . trim($value) . $style . $suffix; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('em', 'i', 'strong', 'b'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/HardBreakConverter.php b/vendor/league/html-to-markdown/src/Converter/HardBreakConverter.php new file mode 100644 index 000000000..37cd44e73 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/HardBreakConverter.php @@ -0,0 +1,41 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\Configuration; +use League\HTMLToMarkdown\ConfigurationAwareInterface; +use League\HTMLToMarkdown\ElementInterface; + +class HardBreakConverter implements ConverterInterface, ConfigurationAwareInterface +{ + /** + * @var Configuration + */ + protected $config; + + /** + * @param Configuration $config + */ + public function setConfig(Configuration $config) + { + $this->config = $config; + } + + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + return $this->config->getOption('hard_break') ? "\n" : " \n"; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('br'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php b/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php new file mode 100644 index 000000000..d117e7d36 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php @@ -0,0 +1,78 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\Configuration; +use League\HTMLToMarkdown\ConfigurationAwareInterface; +use League\HTMLToMarkdown\ElementInterface; + +class HeaderConverter implements ConverterInterface, ConfigurationAwareInterface +{ + const STYLE_ATX = 'atx'; + const STYLE_SETEXT = 'setext'; + + /** + * @var Configuration + */ + protected $config; + + /** + * @param Configuration $config + */ + public function setConfig(Configuration $config) + { + $this->config = $config; + } + + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + $level = (int) substr($element->getTagName(), 1, 1); + $style = $this->config->getOption('header_style', self::STYLE_SETEXT); + + if (($level === 1 || $level === 2) && !$element->isDescendantOf('blockquote') && $style === self::STYLE_SETEXT) { + return $this->createSetextHeader($level, $element->getValue()); + } + + return $this->createAtxHeader($level, $element->getValue()); + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); + } + + /** + * @param int $level + * @param string $content + * + * @return string + */ + private function createSetextHeader($level, $content) + { + $length = function_exists('mb_strlen') ? mb_strlen($content, 'utf-8') : strlen($content); + $underline = ($level === 1) ? '=' : '-'; + + return $content . "\n" . str_repeat($underline, $length) . "\n\n"; + } + + /** + * @param int $level + * @param string $content + * + * @return string + */ + private function createAtxHeader($level, $content) + { + $prefix = str_repeat('#', $level) . ' '; + + return $prefix . $content . "\n\n"; + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/HorizontalRuleConverter.php b/vendor/league/html-to-markdown/src/Converter/HorizontalRuleConverter.php new file mode 100644 index 000000000..8f54f9397 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/HorizontalRuleConverter.php @@ -0,0 +1,26 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +class HorizontalRuleConverter implements ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + return "- - - - - -\n\n"; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('hr'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/ImageConverter.php b/vendor/league/html-to-markdown/src/Converter/ImageConverter.php new file mode 100644 index 000000000..657c769c2 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/ImageConverter.php @@ -0,0 +1,35 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +class ImageConverter implements ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + $src = $element->getAttribute('src'); + $alt = $element->getAttribute('alt'); + $title = $element->getAttribute('title'); + + if ($title !== '') { + // No newlines added. <img> should be in a block-level element. + return '![' . $alt . '](' . $src . ' "' . $title . '")'; + } + + return '![' . $alt . '](' . $src . ')'; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('img'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/LinkConverter.php b/vendor/league/html-to-markdown/src/Converter/LinkConverter.php new file mode 100644 index 000000000..f0765f38b --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/LinkConverter.php @@ -0,0 +1,52 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +class LinkConverter implements ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + $href = $element->getAttribute('href'); + $title = $element->getAttribute('title'); + $text = trim($element->getValue()); + + if ($title !== '') { + $markdown = '[' . $text . '](' . $href . ' "' . $title . '")'; + } elseif ($href === $text && $this->isValidAutolink($href)) { + $markdown = '<' . $href . '>'; + } else { + $markdown = '[' . $text . '](' . $href . ')'; + } + + if (!$href) { + $markdown = html_entity_decode($element->getChildrenAsString()); + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('a'); + } + + /** + * @param string $href + * + * @return bool + */ + private function isValidAutolink($href) + { + return preg_match('/^[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*/i', $href) === 1; + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/ListBlockConverter.php b/vendor/league/html-to-markdown/src/Converter/ListBlockConverter.php new file mode 100644 index 000000000..07a4c85a9 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/ListBlockConverter.php @@ -0,0 +1,26 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +class ListBlockConverter implements ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + return $element->getValue() . "\n"; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('ol', 'ul'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/ListItemConverter.php b/vendor/league/html-to-markdown/src/Converter/ListItemConverter.php new file mode 100644 index 000000000..dafec077c --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/ListItemConverter.php @@ -0,0 +1,47 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +class ListItemConverter implements ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + // If parent is an ol, use numbers, otherwise, use dashes + $list_type = $element->getParent()->getTagName(); + + // Add spaces to start for nested list items + $level = $element->getListItemLevel($element); + + $prefixForParagraph = str_repeat(' ', $level + 1); + $value = trim(implode("\n" . $prefixForParagraph, explode("\n", trim($element->getValue())))); + + // If list item is the first in a nested list, add a newline before it + $prefix = ''; + if ($level > 0 && $element->getSiblingPosition() === 1) { + $prefix = "\n"; + } + + if ($list_type === 'ul') { + return $prefix . '- ' . $value . "\n"; + } + + $number = $element->getSiblingPosition(); + + return $prefix . $number . '. ' . $value . "\n"; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('li'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/ParagraphConverter.php b/vendor/league/html-to-markdown/src/Converter/ParagraphConverter.php new file mode 100644 index 000000000..cf852bfcf --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/ParagraphConverter.php @@ -0,0 +1,124 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +class ParagraphConverter implements ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + $value = $element->getValue(); + + $markdown = ''; + + $lines = preg_split('/\r\n|\r|\n/', $value); + foreach ($lines as $line) { + /* + * Some special characters need to be escaped based on the position that they appear + * The following function will deal with those special cases. + */ + $markdown .= $this->escapeSpecialCharacters($line); + $markdown .= "\n"; + } + + return trim($markdown) !== '' ? rtrim($markdown) . "\n\n" : ''; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('p'); + } + + /** + * @param string $line + * + * @return string + */ + private function escapeSpecialCharacters($line) + { + $line = $this->escapeFirstCharacters($line); + $line = $this->escapeOtherCharacters($line); + $line = $this->escapeOtherCharactersRegex($line); + + return $line; + } + + /** + * @param string $line + * + * @return string + */ + private function escapeFirstCharacters($line) + { + $escapable = array( + '>', + '- ', + '+ ', + '--', + '~~~', + '---', + '- - -' + ); + + foreach ($escapable as $i) { + if (strpos(ltrim($line), $i) === 0) { + // Found a character that must be escaped, adding a backslash before + return '\\' . ltrim($line); + } + } + + return $line; + } + + /** + * @param string $line + * + * @return string + */ + private function escapeOtherCharacters($line) + { + $escapable = array( + '<!--' + ); + + foreach ($escapable as $i) { + if (strpos($line, $i) !== false) { + // Found an escapable character, escaping it + $line = substr_replace($line, '\\', strpos($line, $i), 0); + } + } + + return $line; + } + + /** + * @param string $line + * + * @return string + */ + private function escapeOtherCharactersRegex($line) + { + $regExs = array( + // Match numbers ending on ')' or '.' that are at the beginning of the line. + '/^[0-9]+(?=\)|\.)/' + ); + + foreach ($regExs as $i) { + if (preg_match($i, $line, $match)) { + // Matched an escapable character, adding a backslash on the string before the offending character + $line = substr_replace($line, '\\', strlen($match[0]), 0); + } + } + + return $line; + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php b/vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php new file mode 100644 index 000000000..7a4ec3357 --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php @@ -0,0 +1,59 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +class PreformattedConverter implements ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + $markdown = ''; + + $pre_content = html_entity_decode($element->getChildrenAsString()); + $pre_content = str_replace(array('<pre>', '</pre>'), '', $pre_content); + + /* + * Checking for the code tag. + * Usually pre tags are used along with code tags. This conditional will check for already converted code tags, + * which use backticks, and if those backticks are at the beginning and at the end of the string it means + * there's no more information to convert. + */ + + $firstBacktick = strpos(trim($pre_content), '`'); + $lastBacktick = strrpos(trim($pre_content), '`'); + if ($firstBacktick === 0 && $lastBacktick === strlen(trim($pre_content)) - 1) { + return $pre_content; + } + + // If the execution reaches this point it means it's just a pre tag, with no code tag nested + + // Normalizing new lines + $pre_content = preg_replace('/\r\n|\r|\n/', PHP_EOL, $pre_content); + + // Checking if the string has multiple lines + $lines = preg_split('/\r\n|\r|\n/', $pre_content); + if (count($lines) > 1) { + // Multiple lines detected, adding three backticks and newlines + $markdown .= '```' . "\n" . $pre_content . "\n" . '```'; + } else { + // One line of code, wrapping it on one backtick. + $markdown .= '`' . $pre_content . '`'; + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('pre'); + } +} diff --git a/vendor/league/html-to-markdown/src/Converter/TextConverter.php b/vendor/league/html-to-markdown/src/Converter/TextConverter.php new file mode 100644 index 000000000..d6d91e16f --- /dev/null +++ b/vendor/league/html-to-markdown/src/Converter/TextConverter.php @@ -0,0 +1,46 @@ +<?php + +namespace League\HTMLToMarkdown\Converter; + +use League\HTMLToMarkdown\ElementInterface; + +class TextConverter implements ConverterInterface +{ + /** + * @param ElementInterface $element + * + * @return string + */ + public function convert(ElementInterface $element) + { + $markdown = $element->getValue(); + + // Remove leftover \n at the beginning of the line + $markdown = ltrim($markdown, "\n"); + + // Replace sequences of invisible characters with spaces + $markdown = preg_replace('~\s+~u', ' ', $markdown); + + // Escape the following characters: '*', '_', '[', ']' and '\' + $markdown = preg_replace('~([*_\\[\\]\\\\])~u', '\\\\$1', $markdown); + + $markdown = preg_replace('~^#~u', '\\\\#', $markdown); + + if ($markdown === ' ') { + $next = $element->getNext(); + if (!$next || $next->isBlock()) { + $markdown = ''; + } + } + + return $markdown; + } + + /** + * @return string[] + */ + public function getSupportedTags() + { + return array('#text'); + } +} |