diff options
author | Klaus Weidenbach <Klaus.Weidenbach@gmx.net> | 2017-03-02 23:25:04 +0100 |
---|---|---|
committer | Klaus Weidenbach <Klaus.Weidenbach@gmx.net> | 2017-03-05 01:14:15 +0100 |
commit | 6c79e0c077971029343b2dff30017571ea118438 (patch) | |
tree | 26809ee07eeee05240878bd08cfb4fdcf4bb450a /vendor/pixel418/markdownify/src/ConverterExtra.php | |
parent | 8e1716065ee01959fc799fa14ba627392a876afa (diff) | |
download | volse-hubzilla-6c79e0c077971029343b2dff30017571ea118438.tar.gz volse-hubzilla-6c79e0c077971029343b2dff30017571ea118438.tar.bz2 volse-hubzilla-6c79e0c077971029343b2dff30017571ea118438.zip |
:arrow_up: :hammer: Upgrade Markdownify library.
The current version 2.0.0 (alpha) throws deprecated warning with
PHP7.1 and PHPUnit.
Upgrade the HTML to Markdown converter for PHP to the current
Markdownify 2.2.1.
Used composer to manage this library.
Diffstat (limited to 'vendor/pixel418/markdownify/src/ConverterExtra.php')
-rw-r--r-- | vendor/pixel418/markdownify/src/ConverterExtra.php | 573 |
1 files changed, 573 insertions, 0 deletions
diff --git a/vendor/pixel418/markdownify/src/ConverterExtra.php b/vendor/pixel418/markdownify/src/ConverterExtra.php new file mode 100644 index 000000000..733955448 --- /dev/null +++ b/vendor/pixel418/markdownify/src/ConverterExtra.php @@ -0,0 +1,573 @@ +<?php + +/* This file is part of the Markdownify project, which is under LGPL license */ + +namespace Markdownify; + +class ConverterExtra extends Converter +{ + + /** + * table data, including rows with content and the maximum width of each col + * + * @var array + */ + protected $table = array(); + + /** + * current col + * + * @var int + */ + protected $col = -1; + + /** + * current row + * + * @var int + */ + protected $row = 0; + + /** + * constructor, see Markdownify::Markdownify() for more information + */ + public function __construct($linksAfterEachParagraph = self::LINK_AFTER_CONTENT, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML) + { + parent::__construct($linksAfterEachParagraph, $bodyWidth, $keepHTML); + + // new markdownable tags & attributes + // header ids: # foo {bar} + $this->isMarkdownable['h1']['id'] = 'optional'; + $this->isMarkdownable['h1']['class'] = 'optional'; + $this->isMarkdownable['h2']['id'] = 'optional'; + $this->isMarkdownable['h2']['class'] = 'optional'; + $this->isMarkdownable['h3']['id'] = 'optional'; + $this->isMarkdownable['h3']['class'] = 'optional'; + $this->isMarkdownable['h4']['id'] = 'optional'; + $this->isMarkdownable['h4']['class'] = 'optional'; + $this->isMarkdownable['h5']['id'] = 'optional'; + $this->isMarkdownable['h5']['class'] = 'optional'; + $this->isMarkdownable['h6']['id'] = 'optional'; + $this->isMarkdownable['h6']['class'] = 'optional'; + // tables + $this->isMarkdownable['table'] = array(); + $this->isMarkdownable['th'] = array( + 'align' => 'optional', + ); + $this->isMarkdownable['td'] = array( + 'align' => 'optional', + ); + $this->isMarkdownable['tr'] = array(); + array_push($this->ignore, 'thead'); + array_push($this->ignore, 'tbody'); + array_push($this->ignore, 'tfoot'); + // definition lists + $this->isMarkdownable['dl'] = array(); + $this->isMarkdownable['dd'] = array(); + $this->isMarkdownable['dt'] = array(); + // link class + $this->isMarkdownable['a']['id'] = 'optional'; + $this->isMarkdownable['a']['class'] = 'optional'; + // footnotes + $this->isMarkdownable['fnref'] = array( + 'target' => 'required', + ); + $this->isMarkdownable['footnotes'] = array(); + $this->isMarkdownable['fn'] = array( + 'name' => 'required', + ); + $this->parser->blockElements['fnref'] = false; + $this->parser->blockElements['fn'] = true; + $this->parser->blockElements['footnotes'] = true; + // abbr + $this->isMarkdownable['abbr'] = array( + 'title' => 'required', + ); + // build RegEx lookahead to decide wether table can pe parsed or not + $inlineTags = array_keys($this->parser->blockElements, false); + $colContents = '(?:[^<]|<(?:' . implode('|', $inlineTags) . '|[^a-z]))*'; + $this->tableLookaheadHeader = '{ + ^\s*(?:<thead\s*>)?\s* # open optional thead + <tr\s*>\s*(?: # start required row with headers + <th(?:\s+align=("|\')(?:left|center|right)\1)?\s*> # header with optional align + \s*' . $colContents . '\s* # contents + </th>\s* # close header + )+</tr> # close row with headers + \s*(?:</thead>)? # close optional thead + }sxi'; + $this->tdSubstitute = '\s*' . $colContents . '\s* # contents + </td>\s*'; + $this->tableLookaheadBody = '{ + \s*(?:<tbody\s*>)?\s* # open optional tbody + (?:<tr\s*>\s* # start row + %s # cols to be substituted + </tr>)+ # close row + \s*(?:</tbody>)? # close optional tbody + \s*</table> # close table + }sxi'; + } + + /** + * handle header tags (<h1> - <h6>) + * + * @param int $level 1-6 + * @return void + */ + protected function handleHeader($level) + { + if ($this->parser->isStartTag) { + $this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector(); + $this->stack(); + } else { + $tag = $this->unstack(); + if (!empty($tag['cssSelector'])) { + // {#id.class} + $this->out(' {' . $tag['cssSelector'] . '}'); + } + } + parent::handleHeader($level); + } + + /** + * handle <a> tags parsing + * + * @param void + * @return void + */ + protected function handleTag_a_parser() + { + parent::handleTag_a_parser(); + $this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector(); + } + + /** + * handle <a> tags conversion + * + * @param array $tag + * @param string $buffer + * @return string The markdownified link + */ + protected function handleTag_a_converter($tag, $buffer) + { + $output = parent::handleTag_a_converter($tag, $buffer); + if (!empty($tag['cssSelector'])) { + // [This link][id]{#id.class} + $output .= '{' . $tag['cssSelector'] . '}'; + } + + return $output; + } + + /** + * handle <abbr> tags + * + * @param void + * @return void + */ + protected function handleTag_abbr() + { + if ($this->parser->isStartTag) { + $this->stack(); + $this->buffer(); + } else { + $tag = $this->unstack(); + $tag['text'] = $this->unbuffer(); + $add = true; + foreach ($this->stack['abbr'] as $stacked) { + if ($stacked['text'] == $tag['text']) { + /** TODO: differing abbr definitions, i.e. different titles for same text **/ + $add = false; + break; + } + } + $this->out($tag['text']); + if ($add) { + array_push($this->stack['abbr'], $tag); + } + } + } + + /** + * flush stacked abbr tags + * + * @param void + * @return void + */ + protected function flushStacked_abbr() + { + $out = array(); + foreach ($this->stack['abbr'] as $k => $tag) { + if (!isset($tag['unstacked'])) { + array_push($out, ' *[' . $tag['text'] . ']: ' . $tag['title']); + $tag['unstacked'] = true; + $this->stack['abbr'][$k] = $tag; + } + } + if (!empty($out)) { + $this->out("\n\n" . implode("\n", $out)); + } + } + + /** + * handle <table> tags + * + * @param void + * @return void + */ + protected function handleTag_table() + { + if ($this->parser->isStartTag) { + // check if upcoming table can be converted + if ($this->keepHTML) { + if (preg_match($this->tableLookaheadHeader, $this->parser->html, $matches)) { + // header seems good, now check body + // get align & number of cols + preg_match_all('#<th(?:\s+align=("|\')(left|right|center)\1)?\s*>#si', $matches[0], $cols); + $regEx = ''; + $i = 1; + $aligns = array(); + foreach ($cols[2] as $align) { + $align = strtolower($align); + array_push($aligns, $align); + if (empty($align)) { + $align = 'left'; // default value + } + $td = '\s+align=("|\')' . $align . '\\' . $i; + $i++; + if ($align == 'left') { + // look for empty align or left + $td = '(?:' . $td . ')?'; + } + $td = '<td' . $td . '\s*>'; + $regEx .= $td . $this->tdSubstitute; + } + $regEx = sprintf($this->tableLookaheadBody, $regEx); + if (preg_match($regEx, $this->parser->html, $matches, null, strlen($matches[0]))) { + // this is a markdownable table tag! + $this->table = array( + 'rows' => array(), + 'col_widths' => array(), + 'aligns' => $aligns, + ); + $this->row = 0; + } else { + // non markdownable table + $this->handleTagToText(); + } + } else { + // non markdownable table + $this->handleTagToText(); + } + } else { + $this->table = array( + 'rows' => array(), + 'col_widths' => array(), + 'aligns' => array(), + ); + $this->row = 0; + } + } else { + // finally build the table in Markdown Extra syntax + $separator = array(); + if (!isset($this->table['aligns'])) { + $this->table['aligns'] = array(); + } + // seperator with correct align identifiers + foreach ($this->table['aligns'] as $col => $align) { + if (!$this->keepHTML && !isset($this->table['col_widths'][$col])) { + break; + } + $left = ' '; + $right = ' '; + switch ($align) { + case 'left': + $left = ':'; + break; + case 'center': + $right = ':'; + $left = ':'; + case 'right': + $right = ':'; + break; + } + array_push($separator, $left . str_repeat('-', $this->table['col_widths'][$col]) . $right); + } + $separator = '|' . implode('|', $separator) . '|'; + + $rows = array(); + // add padding + array_walk_recursive($this->table['rows'], array(&$this, 'alignTdContent')); + $header = array_shift($this->table['rows']); + array_push($rows, '| ' . implode(' | ', $header) . ' |'); + array_push($rows, $separator); + foreach ($this->table['rows'] as $row) { + array_push($rows, '| ' . implode(' | ', $row) . ' |'); + } + $this->out(implode("\n" . $this->indent, $rows)); + $this->table = array(); + $this->setLineBreaks(2); + } + } + + /** + * properly pad content so it is aligned as whished + * should be used with array_walk_recursive on $this->table['rows'] + * + * @param string &$content + * @param int $col + * @return void + */ + protected function alignTdContent(&$content, $col) + { + if (!isset($this->table['aligns'][$col])) { + $this->table['aligns'][$col] = 'left'; + } + switch ($this->table['aligns'][$col]) { + default: + case 'left': + $content .= str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)); + break; + case 'right': + $content = str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)) . $content; + break; + case 'center': + $paddingNeeded = $this->table['col_widths'][$col] - $this->strlen($content); + $left = floor($paddingNeeded / 2); + $right = $paddingNeeded - $left; + $content = str_repeat(' ', $left) . $content . str_repeat(' ', $right); + break; + } + } + + /** + * handle <tr> tags + * + * @param void + * @return void + */ + protected function handleTag_tr() + { + if ($this->parser->isStartTag) { + $this->col = -1; + } else { + $this->row++; + } + } + + /** + * handle <td> tags + * + * @param void + * @return void + */ + protected function handleTag_td() + { + if ($this->parser->isStartTag) { + $this->col++; + if (!isset($this->table['col_widths'][$this->col])) { + $this->table['col_widths'][$this->col] = 0; + } + $this->buffer(); + } else { + $buffer = trim($this->unbuffer()); + if (!isset($this->table['col_widths'][$this->col])) { + $this->table['col_widths'][$this->col] = 0; + } + $this->table['col_widths'][$this->col] = max($this->table['col_widths'][$this->col], $this->strlen($buffer)); + $this->table['rows'][$this->row][$this->col] = $buffer; + } + } + + /** + * handle <th> tags + * + * @param void + * @return void + */ + protected function handleTag_th() + { + if (!$this->keepHTML && !isset($this->table['rows'][1]) && !isset($this->table['aligns'][$this->col + 1])) { + if (isset($this->parser->tagAttributes['align'])) { + $this->table['aligns'][$this->col + 1] = $this->parser->tagAttributes['align']; + } else { + $this->table['aligns'][$this->col + 1] = ''; + } + } + $this->handleTag_td(); + } + + /** + * handle <dl> tags + * + * @param void + * @return void + */ + protected function handleTag_dl() + { + if (!$this->parser->isStartTag) { + $this->setLineBreaks(2); + } + } + + /** + * handle <dt> tags + * + * @param void + * @return void + **/ + protected function handleTag_dt() + { + if (!$this->parser->isStartTag) { + $this->setLineBreaks(1); + } + } + + /** + * handle <dd> tags + * + * @param void + * @return void + */ + protected function handleTag_dd() + { + if ($this->parser->isStartTag) { + if (substr(ltrim($this->parser->html), 0, 3) == '<p>') { + // next comes a paragraph, so we'll need an extra line + $this->out("\n" . $this->indent); + } elseif (substr($this->output, -2) == "\n\n") { + $this->output = substr($this->output, 0, -1); + } + $this->out(': '); + $this->indent(' ', false); + } else { + // lookahead for next dt + if (substr(ltrim($this->parser->html), 0, 4) == '<dt>') { + $this->setLineBreaks(2); + } else { + $this->setLineBreaks(1); + } + $this->indent(' '); + } + } + + /** + * handle <fnref /> tags (custom footnote references, see markdownify_extra::parseString()) + * + * @param void + * @return void + */ + protected function handleTag_fnref() + { + $this->out('[^' . $this->parser->tagAttributes['target'] . ']'); + } + + /** + * handle <fn> tags (custom footnotes, see markdownify_extra::parseString() + * and markdownify_extra::_makeFootnotes()) + * + * @param void + * @return void + */ + protected function handleTag_fn() + { + if ($this->parser->isStartTag) { + $this->out('[^' . $this->parser->tagAttributes['name'] . ']:'); + $this->setLineBreaks(1); + } else { + $this->setLineBreaks(2); + } + $this->indent(' '); + } + + /** + * handle <footnotes> tag (custom footnotes, see markdownify_extra::parseString() + * and markdownify_extra::_makeFootnotes()) + * + * @param void + * @return void + */ + protected function handleTag_footnotes() + { + if (!$this->parser->isStartTag) { + $this->setLineBreaks(2); + } + } + + /** + * parse a HTML string, clean up footnotes prior + * + * @param string $HTML input + * @return string Markdown formatted output + */ + public function parseString($html) + { + /** TODO: custom markdown-extra options, e.g. titles & classes **/ + // <sup id="fnref:..."><a href"#fn..." rel="footnote">...</a></sup> + // => <fnref target="..." /> + $html = preg_replace('@<sup id="fnref:([^"]+)">\s*<a href="#fn:\1" rel="footnote">\s*\d+\s*</a>\s*</sup>@Us', '<fnref target="$1" />', $html); + // <div class="footnotes"> + // <hr /> + // <ol> + // + // <li id="fn:...">...</li> + // ... + // + // </ol> + // </div> + // => + // <footnotes> + // <fn name="...">...</fn> + // ... + // </footnotes> + $html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(.+)\s*</ol>\s*</div>#Us', array(&$this, '_makeFootnotes'), $html); + + return parent::parseString($html); + } + + /** + * replace HTML representation of footnotes with something more easily parsable + * + * @note this is a callback to be used in parseString() + * + * @param array $matches + * @return string + */ + protected function _makeFootnotes($matches) + { + // <li id="fn:1"> + // ... + // <a href="#fnref:block" rev="footnote">↩</a></p> + // </li> + // => <fn name="1">...</fn> + // remove footnote link + $fns = preg_replace('@\s*( \s*)?<a href="#fnref:[^"]+" rev="footnote"[^>]*>↩</a>\s*@s', '', $matches[1]); + // remove empty paragraph + $fns = preg_replace('@<p>\s*</p>@s', '', $fns); + // <li id="fn:1">...</li> -> <footnote nr="1">...</footnote> + $fns = str_replace('<li id="fn:', '<fn name="', $fns); + + $fns = '<footnotes>' . $fns . '</footnotes>'; + + return preg_replace('#</li>\s*(?=(?:<fn|</footnotes>))#s', '</fn>$1', $fns); + } + + /** + * handle <a> tags parsing + * + * @param void + * @return void + */ + protected function getCurrentCssSelector() + { + $cssSelector = ''; + if (isset($this->parser->tagAttributes['id'])) { + $cssSelector .= '#' . $this->decode($this->parser->tagAttributes['id']); + } + if (isset($this->parser->tagAttributes['class'])) { + $classes = explode(' ', $this->decode($this->parser->tagAttributes['class'])); + $classes = array_filter($classes); + $cssSelector .= '.' . join('.', $classes); + } + return $cssSelector; + } +} |