From 6c79e0c077971029343b2dff30017571ea118438 Mon Sep 17 00:00:00 2001 From: Klaus Weidenbach Date: Thu, 2 Mar 2017 23:25:04 +0100 Subject: :arrow_up: :hammer: Upgrade Markdownify library. The current version 2.0.0 (alpha) throws deprecated warning with PHP7.1 and PHPUnit. Upgrade the HTML to Markdown converter for PHP to the current Markdownify 2.2.1. Used composer to manage this library. --- vendor/pixel418/markdownify/src/ConverterExtra.php | 573 +++++++++++++++++++++ 1 file changed, 573 insertions(+) create mode 100644 vendor/pixel418/markdownify/src/ConverterExtra.php (limited to 'vendor/pixel418/markdownify/src/ConverterExtra.php') diff --git a/vendor/pixel418/markdownify/src/ConverterExtra.php b/vendor/pixel418/markdownify/src/ConverterExtra.php new file mode 100644 index 000000000..733955448 --- /dev/null +++ b/vendor/pixel418/markdownify/src/ConverterExtra.php @@ -0,0 +1,573 @@ +isMarkdownable['h1']['id'] = 'optional'; + $this->isMarkdownable['h1']['class'] = 'optional'; + $this->isMarkdownable['h2']['id'] = 'optional'; + $this->isMarkdownable['h2']['class'] = 'optional'; + $this->isMarkdownable['h3']['id'] = 'optional'; + $this->isMarkdownable['h3']['class'] = 'optional'; + $this->isMarkdownable['h4']['id'] = 'optional'; + $this->isMarkdownable['h4']['class'] = 'optional'; + $this->isMarkdownable['h5']['id'] = 'optional'; + $this->isMarkdownable['h5']['class'] = 'optional'; + $this->isMarkdownable['h6']['id'] = 'optional'; + $this->isMarkdownable['h6']['class'] = 'optional'; + // tables + $this->isMarkdownable['table'] = array(); + $this->isMarkdownable['th'] = array( + 'align' => 'optional', + ); + $this->isMarkdownable['td'] = array( + 'align' => 'optional', + ); + $this->isMarkdownable['tr'] = array(); + array_push($this->ignore, 'thead'); + array_push($this->ignore, 'tbody'); + array_push($this->ignore, 'tfoot'); + // definition lists + $this->isMarkdownable['dl'] = array(); + $this->isMarkdownable['dd'] = array(); + $this->isMarkdownable['dt'] = array(); + // link class + $this->isMarkdownable['a']['id'] = 'optional'; + $this->isMarkdownable['a']['class'] = 'optional'; + // footnotes + $this->isMarkdownable['fnref'] = array( + 'target' => 'required', + ); + $this->isMarkdownable['footnotes'] = array(); + $this->isMarkdownable['fn'] = array( + 'name' => 'required', + ); + $this->parser->blockElements['fnref'] = false; + $this->parser->blockElements['fn'] = true; + $this->parser->blockElements['footnotes'] = true; + // abbr + $this->isMarkdownable['abbr'] = array( + 'title' => 'required', + ); + // build RegEx lookahead to decide wether table can pe parsed or not + $inlineTags = array_keys($this->parser->blockElements, false); + $colContents = '(?:[^<]|<(?:' . implode('|', $inlineTags) . '|[^a-z]))*'; + $this->tableLookaheadHeader = '{ + ^\s*(?:)?\s* # open optional thead + \s*(?: # start required row with headers + # header with optional align + \s*' . $colContents . '\s* # contents + \s* # close header + )+ # close row with headers + \s*(?:)? # close optional thead + }sxi'; + $this->tdSubstitute = '\s*' . $colContents . '\s* # contents + \s*'; + $this->tableLookaheadBody = '{ + \s*(?:)?\s* # open optional tbody + (?:\s* # start row + %s # cols to be substituted + )+ # close row + \s*(?:)? # close optional tbody + \s* # close table + }sxi'; + } + + /** + * handle header tags (

-

) + * + * @param int $level 1-6 + * @return void + */ + protected function handleHeader($level) + { + if ($this->parser->isStartTag) { + $this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector(); + $this->stack(); + } else { + $tag = $this->unstack(); + if (!empty($tag['cssSelector'])) { + // {#id.class} + $this->out(' {' . $tag['cssSelector'] . '}'); + } + } + parent::handleHeader($level); + } + + /** + * handle tags parsing + * + * @param void + * @return void + */ + protected function handleTag_a_parser() + { + parent::handleTag_a_parser(); + $this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector(); + } + + /** + * handle tags conversion + * + * @param array $tag + * @param string $buffer + * @return string The markdownified link + */ + protected function handleTag_a_converter($tag, $buffer) + { + $output = parent::handleTag_a_converter($tag, $buffer); + if (!empty($tag['cssSelector'])) { + // [This link][id]{#id.class} + $output .= '{' . $tag['cssSelector'] . '}'; + } + + return $output; + } + + /** + * handle tags + * + * @param void + * @return void + */ + protected function handleTag_abbr() + { + if ($this->parser->isStartTag) { + $this->stack(); + $this->buffer(); + } else { + $tag = $this->unstack(); + $tag['text'] = $this->unbuffer(); + $add = true; + foreach ($this->stack['abbr'] as $stacked) { + if ($stacked['text'] == $tag['text']) { + /** TODO: differing abbr definitions, i.e. different titles for same text **/ + $add = false; + break; + } + } + $this->out($tag['text']); + if ($add) { + array_push($this->stack['abbr'], $tag); + } + } + } + + /** + * flush stacked abbr tags + * + * @param void + * @return void + */ + protected function flushStacked_abbr() + { + $out = array(); + foreach ($this->stack['abbr'] as $k => $tag) { + if (!isset($tag['unstacked'])) { + array_push($out, ' *[' . $tag['text'] . ']: ' . $tag['title']); + $tag['unstacked'] = true; + $this->stack['abbr'][$k] = $tag; + } + } + if (!empty($out)) { + $this->out("\n\n" . implode("\n", $out)); + } + } + + /** + * handle tags + * + * @param void + * @return void + */ + protected function handleTag_table() + { + if ($this->parser->isStartTag) { + // check if upcoming table can be converted + if ($this->keepHTML) { + if (preg_match($this->tableLookaheadHeader, $this->parser->html, $matches)) { + // header seems good, now check body + // get align & number of cols + preg_match_all('##si', $matches[0], $cols); + $regEx = ''; + $i = 1; + $aligns = array(); + foreach ($cols[2] as $align) { + $align = strtolower($align); + array_push($aligns, $align); + if (empty($align)) { + $align = 'left'; // default value + } + $td = '\s+align=("|\')' . $align . '\\' . $i; + $i++; + if ($align == 'left') { + // look for empty align or left + $td = '(?:' . $td . ')?'; + } + $td = ''; + $regEx .= $td . $this->tdSubstitute; + } + $regEx = sprintf($this->tableLookaheadBody, $regEx); + if (preg_match($regEx, $this->parser->html, $matches, null, strlen($matches[0]))) { + // this is a markdownable table tag! + $this->table = array( + 'rows' => array(), + 'col_widths' => array(), + 'aligns' => $aligns, + ); + $this->row = 0; + } else { + // non markdownable table + $this->handleTagToText(); + } + } else { + // non markdownable table + $this->handleTagToText(); + } + } else { + $this->table = array( + 'rows' => array(), + 'col_widths' => array(), + 'aligns' => array(), + ); + $this->row = 0; + } + } else { + // finally build the table in Markdown Extra syntax + $separator = array(); + if (!isset($this->table['aligns'])) { + $this->table['aligns'] = array(); + } + // seperator with correct align identifiers + foreach ($this->table['aligns'] as $col => $align) { + if (!$this->keepHTML && !isset($this->table['col_widths'][$col])) { + break; + } + $left = ' '; + $right = ' '; + switch ($align) { + case 'left': + $left = ':'; + break; + case 'center': + $right = ':'; + $left = ':'; + case 'right': + $right = ':'; + break; + } + array_push($separator, $left . str_repeat('-', $this->table['col_widths'][$col]) . $right); + } + $separator = '|' . implode('|', $separator) . '|'; + + $rows = array(); + // add padding + array_walk_recursive($this->table['rows'], array(&$this, 'alignTdContent')); + $header = array_shift($this->table['rows']); + array_push($rows, '| ' . implode(' | ', $header) . ' |'); + array_push($rows, $separator); + foreach ($this->table['rows'] as $row) { + array_push($rows, '| ' . implode(' | ', $row) . ' |'); + } + $this->out(implode("\n" . $this->indent, $rows)); + $this->table = array(); + $this->setLineBreaks(2); + } + } + + /** + * properly pad content so it is aligned as whished + * should be used with array_walk_recursive on $this->table['rows'] + * + * @param string &$content + * @param int $col + * @return void + */ + protected function alignTdContent(&$content, $col) + { + if (!isset($this->table['aligns'][$col])) { + $this->table['aligns'][$col] = 'left'; + } + switch ($this->table['aligns'][$col]) { + default: + case 'left': + $content .= str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)); + break; + case 'right': + $content = str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)) . $content; + break; + case 'center': + $paddingNeeded = $this->table['col_widths'][$col] - $this->strlen($content); + $left = floor($paddingNeeded / 2); + $right = $paddingNeeded - $left; + $content = str_repeat(' ', $left) . $content . str_repeat(' ', $right); + break; + } + } + + /** + * handle tags + * + * @param void + * @return void + */ + protected function handleTag_tr() + { + if ($this->parser->isStartTag) { + $this->col = -1; + } else { + $this->row++; + } + } + + /** + * handle
tags + * + * @param void + * @return void + */ + protected function handleTag_td() + { + if ($this->parser->isStartTag) { + $this->col++; + if (!isset($this->table['col_widths'][$this->col])) { + $this->table['col_widths'][$this->col] = 0; + } + $this->buffer(); + } else { + $buffer = trim($this->unbuffer()); + if (!isset($this->table['col_widths'][$this->col])) { + $this->table['col_widths'][$this->col] = 0; + } + $this->table['col_widths'][$this->col] = max($this->table['col_widths'][$this->col], $this->strlen($buffer)); + $this->table['rows'][$this->row][$this->col] = $buffer; + } + } + + /** + * handle tags + * + * @param void + * @return void + */ + protected function handleTag_th() + { + if (!$this->keepHTML && !isset($this->table['rows'][1]) && !isset($this->table['aligns'][$this->col + 1])) { + if (isset($this->parser->tagAttributes['align'])) { + $this->table['aligns'][$this->col + 1] = $this->parser->tagAttributes['align']; + } else { + $this->table['aligns'][$this->col + 1] = ''; + } + } + $this->handleTag_td(); + } + + /** + * handle
tags + * + * @param void + * @return void + */ + protected function handleTag_dl() + { + if (!$this->parser->isStartTag) { + $this->setLineBreaks(2); + } + } + + /** + * handle
tags + * + * @param void + * @return void + **/ + protected function handleTag_dt() + { + if (!$this->parser->isStartTag) { + $this->setLineBreaks(1); + } + } + + /** + * handle
tags + * + * @param void + * @return void + */ + protected function handleTag_dd() + { + if ($this->parser->isStartTag) { + if (substr(ltrim($this->parser->html), 0, 3) == '

') { + // next comes a paragraph, so we'll need an extra line + $this->out("\n" . $this->indent); + } elseif (substr($this->output, -2) == "\n\n") { + $this->output = substr($this->output, 0, -1); + } + $this->out(': '); + $this->indent(' ', false); + } else { + // lookahead for next dt + if (substr(ltrim($this->parser->html), 0, 4) == '

') { + $this->setLineBreaks(2); + } else { + $this->setLineBreaks(1); + } + $this->indent(' '); + } + } + + /** + * handle tags (custom footnote references, see markdownify_extra::parseString()) + * + * @param void + * @return void + */ + protected function handleTag_fnref() + { + $this->out('[^' . $this->parser->tagAttributes['target'] . ']'); + } + + /** + * handle tags (custom footnotes, see markdownify_extra::parseString() + * and markdownify_extra::_makeFootnotes()) + * + * @param void + * @return void + */ + protected function handleTag_fn() + { + if ($this->parser->isStartTag) { + $this->out('[^' . $this->parser->tagAttributes['name'] . ']:'); + $this->setLineBreaks(1); + } else { + $this->setLineBreaks(2); + } + $this->indent(' '); + } + + /** + * handle tag (custom footnotes, see markdownify_extra::parseString() + * and markdownify_extra::_makeFootnotes()) + * + * @param void + * @return void + */ + protected function handleTag_footnotes() + { + if (!$this->parser->isStartTag) { + $this->setLineBreaks(2); + } + } + + /** + * parse a HTML string, clean up footnotes prior + * + * @param string $HTML input + * @return string Markdown formatted output + */ + public function parseString($html) + { + /** TODO: custom markdown-extra options, e.g. titles & classes **/ + // ... + // => + $html = preg_replace('@\s*\s*\d+\s*\s*@Us', '', $html); + //
+ //
+ //
    + // + //
  1. ...
  2. + // ... + // + //
+ //
+ // => + // + // ... + // ... + // + $html = preg_replace_callback('#
\s*
\s*
    \s*(.+)\s*
\s*
#Us', array(&$this, '_makeFootnotes'), $html); + + return parent::parseString($html); + } + + /** + * replace HTML representation of footnotes with something more easily parsable + * + * @note this is a callback to be used in parseString() + * + * @param array $matches + * @return string + */ + protected function _makeFootnotes($matches) + { + //
  • + // ... + //

    + //
  • + // => ... + // remove footnote link + $fns = preg_replace('@\s*( \s*)?]*>↩\s*@s', '', $matches[1]); + // remove empty paragraph + $fns = preg_replace('@

    \s*

    @s', '', $fns); + //
  • ...
  • -> ... + $fns = str_replace('
  • '; + + return preg_replace('#
  • \s*(?=(?:))#s', '
    $1', $fns); + } + + /** + * handle tags parsing + * + * @param void + * @return void + */ + protected function getCurrentCssSelector() + { + $cssSelector = ''; + if (isset($this->parser->tagAttributes['id'])) { + $cssSelector .= '#' . $this->decode($this->parser->tagAttributes['id']); + } + if (isset($this->parser->tagAttributes['class'])) { + $classes = explode(' ', $this->decode($this->parser->tagAttributes['class'])); + $classes = array_filter($classes); + $cssSelector .= '.' . join('.', $classes); + } + return $cssSelector; + } +} -- cgit v1.2.3