diff options
Diffstat (limited to 'vendor/league/html-to-markdown')
10 files changed, 119 insertions, 30 deletions
diff --git a/vendor/league/html-to-markdown/CHANGELOG.md b/vendor/league/html-to-markdown/CHANGELOG.md index 981ffd594..ab07c94f5 100644 --- a/vendor/league/html-to-markdown/CHANGELOG.md +++ b/vendor/league/html-to-markdown/CHANGELOG.md @@ -4,6 +4,24 @@ Updates should follow the [Keep a CHANGELOG](http://keepachangelog.com/) princip ## [Unreleased][unreleased] +## [4.8.0] - 2018-09-18 +### Added + - Added support for email auto-linking + - Added a new interface (`HtmlConverterInterface`) for the main `HtmlConverter` class + - Added additional test cases (#14) + +### Changed + - The `italic_style` option now defaults to `'*'` so that in-word emphasis is handled properly (#75) + +### Fixed + - Fixed several issues of `<code>` and `<pre>` tags not converting to blocks or inlines properly (#26, #70, #102, #140, #161, #162) + - Fixed in-word emphasis using underscores as delimiter (#75) + - Fixed character escaping inside of `<div>` elements + - Fixed header edge cases + +### Deprecated + - The `bold_style` and `italic_style` options have been deprecated (#75) + ## [4.7.0] - 2018-05-19 ### Added - Added `setOptions()` function for chainable calling (#149) @@ -217,7 +235,8 @@ not ideally set, so this releases fixes that. Moving forwards this should reduce ### Added - Initial release -[unreleased]: https://github.com/thephpleague/html-to-markdown/compare/4.7.0...master +[unreleased]: https://github.com/thephpleague/html-to-markdown/compare/4.8.0...master +[4.8.0]: https://github.com/thephpleague/html-to-markdown/compare/4.7.0...4.8.0 [4.7.0]: https://github.com/thephpleague/html-to-markdown/compare/4.6.2...4.7.0 [4.6.2]: https://github.com/thephpleague/html-to-markdown/compare/4.6.1...4.6.2 [4.6.1]: https://github.com/thephpleague/html-to-markdown/compare/4.6.0...4.6.1 diff --git a/vendor/league/html-to-markdown/composer.json b/vendor/league/html-to-markdown/composer.json index c79230562..53403916e 100644 --- a/vendor/league/html-to-markdown/composer.json +++ b/vendor/league/html-to-markdown/composer.json @@ -9,7 +9,7 @@ { "name": "Colin O'Dell", "email": "colinodell@gmail.com", - "homepage": "http://www.colinodell.com", + "homepage": "https://www.colinodell.com", "role": "Lead Developer" }, { @@ -42,7 +42,7 @@ "bin": ["bin/html-to-markdown"], "extra": { "branch-alias": { - "dev-master": "4.8-dev" + "dev-master": "4.9-dev" } } } diff --git a/vendor/league/html-to-markdown/src/Configuration.php b/vendor/league/html-to-markdown/src/Configuration.php index 2943383aa..5bc8d5503 100644 --- a/vendor/league/html-to-markdown/src/Configuration.php +++ b/vendor/league/html-to-markdown/src/Configuration.php @@ -12,6 +12,8 @@ class Configuration public function __construct(array $config = array()) { $this->config = $config; + + $this->checkForDeprecatedOptions($config); } /** @@ -19,6 +21,7 @@ class Configuration */ public function merge(array $config = array()) { + $this->checkForDeprecatedOptions($config); $this->config = array_replace_recursive($this->config, $config); } @@ -27,6 +30,7 @@ class Configuration */ public function replace(array $config = array()) { + $this->checkForDeprecatedOptions($config); $this->config = $config; } @@ -36,6 +40,7 @@ class Configuration */ public function setOption($key, $value) { + $this->checkForDeprecatedOptions(array($key => $value)); $this->config[$key] = $value; } @@ -57,4 +62,15 @@ class Configuration return $this->config[$key]; } + + private function checkForDeprecatedOptions(array $config) + { + foreach ($config as $key => $value) { + if ($key === 'bold_style' && $value !== '**') { + @trigger_error('Customizing the bold_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED); + } elseif ($key === 'italic_style' && $value !== '*') { + @trigger_error('Customizing the italic_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED); + } + } + } } diff --git a/vendor/league/html-to-markdown/src/Converter/CodeConverter.php b/vendor/league/html-to-markdown/src/Converter/CodeConverter.php index e536362ee..39e6a7bc4 100644 --- a/vendor/league/html-to-markdown/src/Converter/CodeConverter.php +++ b/vendor/league/html-to-markdown/src/Converter/CodeConverter.php @@ -13,7 +13,7 @@ class CodeConverter implements ConverterInterface */ public function convert(ElementInterface $element) { - $language = null; + $language = ''; // Checking for language class on the code block $classes = $element->getAttribute('class'); @@ -24,8 +24,7 @@ class CodeConverter implements ConverterInterface foreach ($classes as $class) { if (strpos($class, 'language-') !== false) { // Found one, save it as the selected language and stop looping over the classes. - // The space after the language avoids gluing the actual code with the language tag - $language = str_replace('language-', '', $class) . ' '; + $language = str_replace('language-', '', $class); break; } } @@ -39,14 +38,13 @@ class CodeConverter implements ConverterInterface $code = preg_replace('/<code\b[^>]*>/', '', $code); $code = str_replace('</code>', '', $code); - // Checking if the code has multiple lines - $lines = preg_split('/\r\n|\r|\n/', $code); - if (count($lines) > 1) { - // Multiple lines detected, adding three backticks and newlines - $markdown .= '```' . $language . "\n" . $code . "\n" . '```' . "\n\n"; + // Checking if it's a code block or span + if ($this->shouldBeBlock($element, $code)) { + // Code block detected, newlines will be added in parent + $markdown .= '```' . $language . "\n" . $code . "\n" . '```'; } else { - // One line of code, wrapping it on one backtick. - $markdown .= '`' . $language . $code . '`'; + // One line of code, wrapping it on one backtick, removing new lines + $markdown .= '`' . preg_replace('/\r\n|\r|\n/', '', $code) . '`'; } return $markdown; @@ -59,4 +57,23 @@ class CodeConverter implements ConverterInterface { return array('code'); } + + /** + * @param ElementInterface $element + * @param string $code + * + * @return bool + */ + private function shouldBeBlock(ElementInterface $element, $code) + { + if ($element->getParent()->getTagName() == 'pre') { + return true; + } + + if (preg_match('/[^\s]` `/', $code)) { + return true; + } + + return false; + } } diff --git a/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php b/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php index d117e7d36..05d4fe81e 100644 --- a/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php +++ b/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php @@ -34,6 +34,10 @@ class HeaderConverter implements ConverterInterface, ConfigurationAwareInterface $level = (int) substr($element->getTagName(), 1, 1); $style = $this->config->getOption('header_style', self::STYLE_SETEXT); + if (strlen($element->getValue()) === 0) { + return ''; + } + if (($level === 1 || $level === 2) && !$element->isDescendantOf('blockquote') && $style === self::STYLE_SETEXT) { return $this->createSetextHeader($level, $element->getValue()); } diff --git a/vendor/league/html-to-markdown/src/Converter/LinkConverter.php b/vendor/league/html-to-markdown/src/Converter/LinkConverter.php index 74b49a778..c82b70e97 100644 --- a/vendor/league/html-to-markdown/src/Converter/LinkConverter.php +++ b/vendor/league/html-to-markdown/src/Converter/LinkConverter.php @@ -21,6 +21,8 @@ class LinkConverter implements ConverterInterface $markdown = '[' . $text . '](' . $href . ' "' . $title . '")'; } elseif ($href === $text && $this->isValidAutolink($href)) { $markdown = '<' . $href . '>'; + } elseif ($href === 'mailto:' . $text && $this->isValidEmail($text)) { + $markdown = '<' . $text . '>'; } else { $markdown = '[' . $text . '](' . $href . ')'; } @@ -49,4 +51,15 @@ class LinkConverter implements ConverterInterface { return preg_match('/^[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*/i', $href) === 1; } + + /** + * @param string $email + * + * @return bool + */ + private function isValidEmail($email) + { + // Email validation is messy business, but this should cover most cases + return filter_var($email, FILTER_VALIDATE_EMAIL); + } } diff --git a/vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php b/vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php index 3b77ba10b..321c898b1 100644 --- a/vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php +++ b/vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php @@ -13,8 +13,6 @@ class PreformattedConverter implements ConverterInterface */ public function convert(ElementInterface $element) { - $markdown = ''; - $pre_content = html_entity_decode($element->getChildrenAsString()); $pre_content = str_replace(array('<pre>', '</pre>'), '', $pre_content); @@ -28,28 +26,22 @@ class PreformattedConverter implements ConverterInterface $firstBacktick = strpos(trim($pre_content), '`'); $lastBacktick = strrpos(trim($pre_content), '`'); if ($firstBacktick === 0 && $lastBacktick === strlen(trim($pre_content)) - 1) { - return $pre_content; + return $pre_content . "\n\n"; } // If the execution reaches this point it means it's just a pre tag, with no code tag nested // Empty lines are a special case if ($pre_content === '') { - return "```\n```\n"; + return "```\n```\n\n"; } // Normalizing new lines - $pre_content = preg_replace('/\r\n|\r|\n/', PHP_EOL, $pre_content); - - // Is it a single line? - if (strpos($pre_content, PHP_EOL) === false) { - // One line of code, wrapping it on one backtick. - return '`' . $pre_content . '`'; - } + $pre_content = preg_replace('/\r\n|\r|\n/', "\n", $pre_content); // Ensure there's a newline at the end - if (strrpos($pre_content, PHP_EOL) !== strlen($pre_content) - 1) { - $pre_content .= PHP_EOL; + if (strrpos($pre_content, "\n") !== strlen($pre_content) - strlen("\n")) { + $pre_content .= "\n"; } // Use three backticks diff --git a/vendor/league/html-to-markdown/src/Converter/TextConverter.php b/vendor/league/html-to-markdown/src/Converter/TextConverter.php index d6d91e16f..fcd466094 100644 --- a/vendor/league/html-to-markdown/src/Converter/TextConverter.php +++ b/vendor/league/html-to-markdown/src/Converter/TextConverter.php @@ -22,7 +22,9 @@ class TextConverter implements ConverterInterface $markdown = preg_replace('~\s+~u', ' ', $markdown); // Escape the following characters: '*', '_', '[', ']' and '\' - $markdown = preg_replace('~([*_\\[\\]\\\\])~u', '\\\\$1', $markdown); + if ($element->getParent() && $element->getParent()->getTagName() !== 'div') { + $markdown = preg_replace('~([*_\\[\\]\\\\])~u', '\\\\$1', $markdown); + } $markdown = preg_replace('~^#~u', '\\\\#', $markdown); diff --git a/vendor/league/html-to-markdown/src/HtmlConverter.php b/vendor/league/html-to-markdown/src/HtmlConverter.php index 155369f56..3381e1e1e 100644 --- a/vendor/league/html-to-markdown/src/HtmlConverter.php +++ b/vendor/league/html-to-markdown/src/HtmlConverter.php @@ -14,7 +14,7 @@ namespace League\HTMLToMarkdown; * * @license http://www.opensource.org/licenses/mit-license.php MIT */ -class HtmlConverter +class HtmlConverter implements HtmlConverterInterface { /** * @var Environment @@ -35,8 +35,8 @@ class HtmlConverter 'header_style' => 'setext', // Set to 'atx' to output H1 and H2 headers as # Header1 and ## Header2 'suppress_errors' => true, // Set to false to show warnings when loading malformed HTML 'strip_tags' => false, // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output. - 'bold_style' => '**', // Set to '__' if you prefer the underlined style - 'italic_style' => '_', // Set to '*' if you prefer the asterisk style + 'bold_style' => '**', // DEPRECATED: Set to '__' if you prefer the underlined style + 'italic_style' => '*', // DEPRECATED: Set to '_' if you prefer the underlined style 'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: 'meta style script' 'hard_break' => false, // Set to true to turn <br> into `\n` instead of ` \n` 'list_item_style' => '-', // Set the default character for each <li> in a <ul>. Can be '-', '*', or '+' diff --git a/vendor/league/html-to-markdown/src/HtmlConverterInterface.php b/vendor/league/html-to-markdown/src/HtmlConverterInterface.php new file mode 100644 index 000000000..7d43cf87e --- /dev/null +++ b/vendor/league/html-to-markdown/src/HtmlConverterInterface.php @@ -0,0 +1,26 @@ +<?php + +namespace League\HTMLToMarkdown; + +/** + * Interface for an HTML-to-Markdown converter. + * + * @author Colin O'Dell <colinodell@gmail.com> + * + * @link https://github.com/thephpleague/html-to-markdown/ Latest version on GitHub. + * + * @license http://www.opensource.org/licenses/mit-license.php MIT + */ +interface HtmlConverterInterface +{ + /** + * Convert the given $html to Markdown + * + * @param string $html + * + * @throws \InvalidArgumentException + * + * @return string The Markdown version of the html + */ + public function convert($html); +} |