aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/league/html-to-markdown
diff options
context:
space:
mode:
authorzotlabs <mike@macgirvin.com>2018-10-30 18:11:49 -0700
committerzotlabs <mike@macgirvin.com>2018-10-30 18:11:49 -0700
commit70c55da1df69d90dcbeb5a78c994b23a8456bfc9 (patch)
tree86d39069b02ecabe4ed091514e5076a98abda43a /vendor/league/html-to-markdown
parent623aa7ea48149ca7c2bc556931f25befdf49e58a (diff)
parentf9ab7647dd660adb37464614616cb8484c500de4 (diff)
downloadvolse-hubzilla-70c55da1df69d90dcbeb5a78c994b23a8456bfc9.tar.gz
volse-hubzilla-70c55da1df69d90dcbeb5a78c994b23a8456bfc9.tar.bz2
volse-hubzilla-70c55da1df69d90dcbeb5a78c994b23a8456bfc9.zip
Merge branch 'dev' of https://framagit.org/hubzilla/core into xdev_merge
Diffstat (limited to 'vendor/league/html-to-markdown')
-rw-r--r--vendor/league/html-to-markdown/CHANGELOG.md21
-rw-r--r--vendor/league/html-to-markdown/composer.json4
-rw-r--r--vendor/league/html-to-markdown/src/Configuration.php16
-rw-r--r--vendor/league/html-to-markdown/src/Converter/CodeConverter.php37
-rw-r--r--vendor/league/html-to-markdown/src/Converter/HeaderConverter.php4
-rw-r--r--vendor/league/html-to-markdown/src/Converter/LinkConverter.php13
-rw-r--r--vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php18
-rw-r--r--vendor/league/html-to-markdown/src/Converter/TextConverter.php4
-rw-r--r--vendor/league/html-to-markdown/src/HtmlConverter.php6
-rw-r--r--vendor/league/html-to-markdown/src/HtmlConverterInterface.php26
10 files changed, 119 insertions, 30 deletions
diff --git a/vendor/league/html-to-markdown/CHANGELOG.md b/vendor/league/html-to-markdown/CHANGELOG.md
index 981ffd594..ab07c94f5 100644
--- a/vendor/league/html-to-markdown/CHANGELOG.md
+++ b/vendor/league/html-to-markdown/CHANGELOG.md
@@ -4,6 +4,24 @@ Updates should follow the [Keep a CHANGELOG](http://keepachangelog.com/) princip
## [Unreleased][unreleased]
+## [4.8.0] - 2018-09-18
+### Added
+ - Added support for email auto-linking
+ - Added a new interface (`HtmlConverterInterface`) for the main `HtmlConverter` class
+ - Added additional test cases (#14)
+
+### Changed
+ - The `italic_style` option now defaults to `'*'` so that in-word emphasis is handled properly (#75)
+
+### Fixed
+ - Fixed several issues of `<code>` and `<pre>` tags not converting to blocks or inlines properly (#26, #70, #102, #140, #161, #162)
+ - Fixed in-word emphasis using underscores as delimiter (#75)
+ - Fixed character escaping inside of `<div>` elements
+ - Fixed header edge cases
+
+### Deprecated
+ - The `bold_style` and `italic_style` options have been deprecated (#75)
+
## [4.7.0] - 2018-05-19
### Added
- Added `setOptions()` function for chainable calling (#149)
@@ -217,7 +235,8 @@ not ideally set, so this releases fixes that. Moving forwards this should reduce
### Added
- Initial release
-[unreleased]: https://github.com/thephpleague/html-to-markdown/compare/4.7.0...master
+[unreleased]: https://github.com/thephpleague/html-to-markdown/compare/4.8.0...master
+[4.8.0]: https://github.com/thephpleague/html-to-markdown/compare/4.7.0...4.8.0
[4.7.0]: https://github.com/thephpleague/html-to-markdown/compare/4.6.2...4.7.0
[4.6.2]: https://github.com/thephpleague/html-to-markdown/compare/4.6.1...4.6.2
[4.6.1]: https://github.com/thephpleague/html-to-markdown/compare/4.6.0...4.6.1
diff --git a/vendor/league/html-to-markdown/composer.json b/vendor/league/html-to-markdown/composer.json
index c79230562..53403916e 100644
--- a/vendor/league/html-to-markdown/composer.json
+++ b/vendor/league/html-to-markdown/composer.json
@@ -9,7 +9,7 @@
{
"name": "Colin O'Dell",
"email": "colinodell@gmail.com",
- "homepage": "http://www.colinodell.com",
+ "homepage": "https://www.colinodell.com",
"role": "Lead Developer"
},
{
@@ -42,7 +42,7 @@
"bin": ["bin/html-to-markdown"],
"extra": {
"branch-alias": {
- "dev-master": "4.8-dev"
+ "dev-master": "4.9-dev"
}
}
}
diff --git a/vendor/league/html-to-markdown/src/Configuration.php b/vendor/league/html-to-markdown/src/Configuration.php
index 2943383aa..5bc8d5503 100644
--- a/vendor/league/html-to-markdown/src/Configuration.php
+++ b/vendor/league/html-to-markdown/src/Configuration.php
@@ -12,6 +12,8 @@ class Configuration
public function __construct(array $config = array())
{
$this->config = $config;
+
+ $this->checkForDeprecatedOptions($config);
}
/**
@@ -19,6 +21,7 @@ class Configuration
*/
public function merge(array $config = array())
{
+ $this->checkForDeprecatedOptions($config);
$this->config = array_replace_recursive($this->config, $config);
}
@@ -27,6 +30,7 @@ class Configuration
*/
public function replace(array $config = array())
{
+ $this->checkForDeprecatedOptions($config);
$this->config = $config;
}
@@ -36,6 +40,7 @@ class Configuration
*/
public function setOption($key, $value)
{
+ $this->checkForDeprecatedOptions(array($key => $value));
$this->config[$key] = $value;
}
@@ -57,4 +62,15 @@ class Configuration
return $this->config[$key];
}
+
+ private function checkForDeprecatedOptions(array $config)
+ {
+ foreach ($config as $key => $value) {
+ if ($key === 'bold_style' && $value !== '**') {
+ @trigger_error('Customizing the bold_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED);
+ } elseif ($key === 'italic_style' && $value !== '*') {
+ @trigger_error('Customizing the italic_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED);
+ }
+ }
+ }
}
diff --git a/vendor/league/html-to-markdown/src/Converter/CodeConverter.php b/vendor/league/html-to-markdown/src/Converter/CodeConverter.php
index e536362ee..39e6a7bc4 100644
--- a/vendor/league/html-to-markdown/src/Converter/CodeConverter.php
+++ b/vendor/league/html-to-markdown/src/Converter/CodeConverter.php
@@ -13,7 +13,7 @@ class CodeConverter implements ConverterInterface
*/
public function convert(ElementInterface $element)
{
- $language = null;
+ $language = '';
// Checking for language class on the code block
$classes = $element->getAttribute('class');
@@ -24,8 +24,7 @@ class CodeConverter implements ConverterInterface
foreach ($classes as $class) {
if (strpos($class, 'language-') !== false) {
// Found one, save it as the selected language and stop looping over the classes.
- // The space after the language avoids gluing the actual code with the language tag
- $language = str_replace('language-', '', $class) . ' ';
+ $language = str_replace('language-', '', $class);
break;
}
}
@@ -39,14 +38,13 @@ class CodeConverter implements ConverterInterface
$code = preg_replace('/<code\b[^>]*>/', '', $code);
$code = str_replace('</code>', '', $code);
- // Checking if the code has multiple lines
- $lines = preg_split('/\r\n|\r|\n/', $code);
- if (count($lines) > 1) {
- // Multiple lines detected, adding three backticks and newlines
- $markdown .= '```' . $language . "\n" . $code . "\n" . '```' . "\n\n";
+ // Checking if it's a code block or span
+ if ($this->shouldBeBlock($element, $code)) {
+ // Code block detected, newlines will be added in parent
+ $markdown .= '```' . $language . "\n" . $code . "\n" . '```';
} else {
- // One line of code, wrapping it on one backtick.
- $markdown .= '`' . $language . $code . '`';
+ // One line of code, wrapping it on one backtick, removing new lines
+ $markdown .= '`' . preg_replace('/\r\n|\r|\n/', '', $code) . '`';
}
return $markdown;
@@ -59,4 +57,23 @@ class CodeConverter implements ConverterInterface
{
return array('code');
}
+
+ /**
+ * @param ElementInterface $element
+ * @param string $code
+ *
+ * @return bool
+ */
+ private function shouldBeBlock(ElementInterface $element, $code)
+ {
+ if ($element->getParent()->getTagName() == 'pre') {
+ return true;
+ }
+
+ if (preg_match('/[^\s]` `/', $code)) {
+ return true;
+ }
+
+ return false;
+ }
}
diff --git a/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php b/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php
index d117e7d36..05d4fe81e 100644
--- a/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php
+++ b/vendor/league/html-to-markdown/src/Converter/HeaderConverter.php
@@ -34,6 +34,10 @@ class HeaderConverter implements ConverterInterface, ConfigurationAwareInterface
$level = (int) substr($element->getTagName(), 1, 1);
$style = $this->config->getOption('header_style', self::STYLE_SETEXT);
+ if (strlen($element->getValue()) === 0) {
+ return '';
+ }
+
if (($level === 1 || $level === 2) && !$element->isDescendantOf('blockquote') && $style === self::STYLE_SETEXT) {
return $this->createSetextHeader($level, $element->getValue());
}
diff --git a/vendor/league/html-to-markdown/src/Converter/LinkConverter.php b/vendor/league/html-to-markdown/src/Converter/LinkConverter.php
index 74b49a778..c82b70e97 100644
--- a/vendor/league/html-to-markdown/src/Converter/LinkConverter.php
+++ b/vendor/league/html-to-markdown/src/Converter/LinkConverter.php
@@ -21,6 +21,8 @@ class LinkConverter implements ConverterInterface
$markdown = '[' . $text . '](' . $href . ' "' . $title . '")';
} elseif ($href === $text && $this->isValidAutolink($href)) {
$markdown = '<' . $href . '>';
+ } elseif ($href === 'mailto:' . $text && $this->isValidEmail($text)) {
+ $markdown = '<' . $text . '>';
} else {
$markdown = '[' . $text . '](' . $href . ')';
}
@@ -49,4 +51,15 @@ class LinkConverter implements ConverterInterface
{
return preg_match('/^[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*/i', $href) === 1;
}
+
+ /**
+ * @param string $email
+ *
+ * @return bool
+ */
+ private function isValidEmail($email)
+ {
+ // Email validation is messy business, but this should cover most cases
+ return filter_var($email, FILTER_VALIDATE_EMAIL);
+ }
}
diff --git a/vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php b/vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php
index 3b77ba10b..321c898b1 100644
--- a/vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php
+++ b/vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php
@@ -13,8 +13,6 @@ class PreformattedConverter implements ConverterInterface
*/
public function convert(ElementInterface $element)
{
- $markdown = '';
-
$pre_content = html_entity_decode($element->getChildrenAsString());
$pre_content = str_replace(array('<pre>', '</pre>'), '', $pre_content);
@@ -28,28 +26,22 @@ class PreformattedConverter implements ConverterInterface
$firstBacktick = strpos(trim($pre_content), '`');
$lastBacktick = strrpos(trim($pre_content), '`');
if ($firstBacktick === 0 && $lastBacktick === strlen(trim($pre_content)) - 1) {
- return $pre_content;
+ return $pre_content . "\n\n";
}
// If the execution reaches this point it means it's just a pre tag, with no code tag nested
// Empty lines are a special case
if ($pre_content === '') {
- return "```\n```\n";
+ return "```\n```\n\n";
}
// Normalizing new lines
- $pre_content = preg_replace('/\r\n|\r|\n/', PHP_EOL, $pre_content);
-
- // Is it a single line?
- if (strpos($pre_content, PHP_EOL) === false) {
- // One line of code, wrapping it on one backtick.
- return '`' . $pre_content . '`';
- }
+ $pre_content = preg_replace('/\r\n|\r|\n/', "\n", $pre_content);
// Ensure there's a newline at the end
- if (strrpos($pre_content, PHP_EOL) !== strlen($pre_content) - 1) {
- $pre_content .= PHP_EOL;
+ if (strrpos($pre_content, "\n") !== strlen($pre_content) - strlen("\n")) {
+ $pre_content .= "\n";
}
// Use three backticks
diff --git a/vendor/league/html-to-markdown/src/Converter/TextConverter.php b/vendor/league/html-to-markdown/src/Converter/TextConverter.php
index d6d91e16f..fcd466094 100644
--- a/vendor/league/html-to-markdown/src/Converter/TextConverter.php
+++ b/vendor/league/html-to-markdown/src/Converter/TextConverter.php
@@ -22,7 +22,9 @@ class TextConverter implements ConverterInterface
$markdown = preg_replace('~\s+~u', ' ', $markdown);
// Escape the following characters: '*', '_', '[', ']' and '\'
- $markdown = preg_replace('~([*_\\[\\]\\\\])~u', '\\\\$1', $markdown);
+ if ($element->getParent() && $element->getParent()->getTagName() !== 'div') {
+ $markdown = preg_replace('~([*_\\[\\]\\\\])~u', '\\\\$1', $markdown);
+ }
$markdown = preg_replace('~^#~u', '\\\\#', $markdown);
diff --git a/vendor/league/html-to-markdown/src/HtmlConverter.php b/vendor/league/html-to-markdown/src/HtmlConverter.php
index 155369f56..3381e1e1e 100644
--- a/vendor/league/html-to-markdown/src/HtmlConverter.php
+++ b/vendor/league/html-to-markdown/src/HtmlConverter.php
@@ -14,7 +14,7 @@ namespace League\HTMLToMarkdown;
*
* @license http://www.opensource.org/licenses/mit-license.php MIT
*/
-class HtmlConverter
+class HtmlConverter implements HtmlConverterInterface
{
/**
* @var Environment
@@ -35,8 +35,8 @@ class HtmlConverter
'header_style' => 'setext', // Set to 'atx' to output H1 and H2 headers as # Header1 and ## Header2
'suppress_errors' => true, // Set to false to show warnings when loading malformed HTML
'strip_tags' => false, // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output.
- 'bold_style' => '**', // Set to '__' if you prefer the underlined style
- 'italic_style' => '_', // Set to '*' if you prefer the asterisk style
+ 'bold_style' => '**', // DEPRECATED: Set to '__' if you prefer the underlined style
+ 'italic_style' => '*', // DEPRECATED: Set to '_' if you prefer the underlined style
'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: 'meta style script'
'hard_break' => false, // Set to true to turn <br> into `\n` instead of ` \n`
'list_item_style' => '-', // Set the default character for each <li> in a <ul>. Can be '-', '*', or '+'
diff --git a/vendor/league/html-to-markdown/src/HtmlConverterInterface.php b/vendor/league/html-to-markdown/src/HtmlConverterInterface.php
new file mode 100644
index 000000000..7d43cf87e
--- /dev/null
+++ b/vendor/league/html-to-markdown/src/HtmlConverterInterface.php
@@ -0,0 +1,26 @@
+<?php
+
+namespace League\HTMLToMarkdown;
+
+/**
+ * Interface for an HTML-to-Markdown converter.
+ *
+ * @author Colin O'Dell <colinodell@gmail.com>
+ *
+ * @link https://github.com/thephpleague/html-to-markdown/ Latest version on GitHub.
+ *
+ * @license http://www.opensource.org/licenses/mit-license.php MIT
+ */
+interface HtmlConverterInterface
+{
+ /**
+ * Convert the given $html to Markdown
+ *
+ * @param string $html
+ *
+ * @throws \InvalidArgumentException
+ *
+ * @return string The Markdown version of the html
+ */
+ public function convert($html);
+}