diff options
author | zotlabs <mike@macgirvin.com> | 2017-03-05 18:22:37 +1100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-03-05 18:22:37 +1100 |
commit | 215bd07f0b23c03f8af0be7bee1d393f1821087d (patch) | |
tree | 26809ee07eeee05240878bd08cfb4fdcf4bb450a /vendor/pixel418/markdownify/src/ConverterExtra.php | |
parent | 8e1716065ee01959fc799fa14ba627392a876afa (diff) | |
parent | 6c79e0c077971029343b2dff30017571ea118438 (diff) | |
download | volse-hubzilla-215bd07f0b23c03f8af0be7bee1d393f1821087d.tar.gz volse-hubzilla-215bd07f0b23c03f8af0be7bee1d393f1821087d.tar.bz2 volse-hubzilla-215bd07f0b23c03f8af0be7bee1d393f1821087d.zip |
Merge pull request #688 from dawnbreak/markdown
:arrow_up: :hammer: Upgrade Markdownify library.
Diffstat (limited to 'vendor/pixel418/markdownify/src/ConverterExtra.php')
-rw-r--r-- | vendor/pixel418/markdownify/src/ConverterExtra.php | 573 |
1 files changed, 573 insertions, 0 deletions
diff --git a/vendor/pixel418/markdownify/src/ConverterExtra.php b/vendor/pixel418/markdownify/src/ConverterExtra.php new file mode 100644 index 000000000..733955448 --- /dev/null +++ b/vendor/pixel418/markdownify/src/ConverterExtra.php @@ -0,0 +1,573 @@ +<?php + +/* This file is part of the Markdownify project, which is under LGPL license */ + +namespace Markdownify; + +class ConverterExtra extends Converter +{ + + /** + * table data, including rows with content and the maximum width of each col + * + * @var array + */ + protected $table = array(); + + /** + * current col + * + * @var int + */ + protected $col = -1; + + /** + * current row + * + * @var int + */ + protected $row = 0; + + /** + * constructor, see Markdownify::Markdownify() for more information + */ + public function __construct($linksAfterEachParagraph = self::LINK_AFTER_CONTENT, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML) + { + parent::__construct($linksAfterEachParagraph, $bodyWidth, $keepHTML); + + // new markdownable tags & attributes + // header ids: # foo {bar} + $this->isMarkdownable['h1']['id'] = 'optional'; + $this->isMarkdownable['h1']['class'] = 'optional'; + $this->isMarkdownable['h2']['id'] = 'optional'; + $this->isMarkdownable['h2']['class'] = 'optional'; + $this->isMarkdownable['h3']['id'] = 'optional'; + $this->isMarkdownable['h3']['class'] = 'optional'; + $this->isMarkdownable['h4']['id'] = 'optional'; + $this->isMarkdownable['h4']['class'] = 'optional'; + $this->isMarkdownable['h5']['id'] = 'optional'; + $this->isMarkdownable['h5']['class'] = 'optional'; + $this->isMarkdownable['h6']['id'] = 'optional'; + $this->isMarkdownable['h6']['class'] = 'optional'; + // tables + $this->isMarkdownable['table'] = array(); + $this->isMarkdownable['th'] = array( + 'align' => 'optional', + ); + $this->isMarkdownable['td'] = array( + 'align' => 'optional', + ); + $this->isMarkdownable['tr'] = array(); + array_push($this->ignore, 'thead'); + array_push($this->ignore, 'tbody'); + array_push($this->ignore, 'tfoot'); + // definition lists + $this->isMarkdownable['dl'] = array(); + $this->isMarkdownable['dd'] = array(); + $this->isMarkdownable['dt'] = array(); + // link class + $this->isMarkdownable['a']['id'] = 'optional'; + $this->isMarkdownable['a']['class'] = 'optional'; + // footnotes + $this->isMarkdownable['fnref'] = array( + 'target' => 'required', + ); + $this->isMarkdownable['footnotes'] = array(); + $this->isMarkdownable['fn'] = array( + 'name' => 'required', + ); + $this->parser->blockElements['fnref'] = false; + $this->parser->blockElements['fn'] = true; + $this->parser->blockElements['footnotes'] = true; + // abbr + $this->isMarkdownable['abbr'] = array( + 'title' => 'required', + ); + // build RegEx lookahead to decide wether table can pe parsed or not + $inlineTags = array_keys($this->parser->blockElements, false); + $colContents = '(?:[^<]|<(?:' . implode('|', $inlineTags) . '|[^a-z]))*'; + $this->tableLookaheadHeader = '{ + ^\s*(?:<thead\s*>)?\s* # open optional thead + <tr\s*>\s*(?: # start required row with headers + <th(?:\s+align=("|\')(?:left|center|right)\1)?\s*> # header with optional align + \s*' . $colContents . '\s* # contents + </th>\s* # close header + )+</tr> # close row with headers + \s*(?:</thead>)? # close optional thead + }sxi'; + $this->tdSubstitute = '\s*' . $colContents . '\s* # contents + </td>\s*'; + $this->tableLookaheadBody = '{ + \s*(?:<tbody\s*>)?\s* # open optional tbody + (?:<tr\s*>\s* # start row + %s # cols to be substituted + </tr>)+ # close row + \s*(?:</tbody>)? # close optional tbody + \s*</table> # close table + }sxi'; + } + + /** + * handle header tags (<h1> - <h6>) + * + * @param int $level 1-6 + * @return void + */ + protected function handleHeader($level) + { + if ($this->parser->isStartTag) { + $this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector(); + $this->stack(); + } else { + $tag = $this->unstack(); + if (!empty($tag['cssSelector'])) { + // {#id.class} + $this->out(' {' . $tag['cssSelector'] . '}'); + } + } + parent::handleHeader($level); + } + + /** + * handle <a> tags parsing + * + * @param void + * @return void + */ + protected function handleTag_a_parser() + { + parent::handleTag_a_parser(); + $this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector(); + } + + /** + * handle <a> tags conversion + * + * @param array $tag + * @param string $buffer + * @return string The markdownified link + */ + protected function handleTag_a_converter($tag, $buffer) + { + $output = parent::handleTag_a_converter($tag, $buffer); + if (!empty($tag['cssSelector'])) { + // [This link][id]{#id.class} + $output .= '{' . $tag['cssSelector'] . '}'; + } + + return $output; + } + + /** + * handle <abbr> tags + * + * @param void + * @return void + */ + protected function handleTag_abbr() + { + if ($this->parser->isStartTag) { + $this->stack(); + $this->buffer(); + } else { + $tag = $this->unstack(); + $tag['text'] = $this->unbuffer(); + $add = true; + foreach ($this->stack['abbr'] as $stacked) { + if ($stacked['text'] == $tag['text']) { + /** TODO: differing abbr definitions, i.e. different titles for same text **/ + $add = false; + break; + } + } + $this->out($tag['text']); + if ($add) { + array_push($this->stack['abbr'], $tag); + } + } + } + + /** + * flush stacked abbr tags + * + * @param void + * @return void + */ + protected function flushStacked_abbr() + { + $out = array(); + foreach ($this->stack['abbr'] as $k => $tag) { + if (!isset($tag['unstacked'])) { + array_push($out, ' *[' . $tag['text'] . ']: ' . $tag['title']); + $tag['unstacked'] = true; + $this->stack['abbr'][$k] = $tag; + } + } + if (!empty($out)) { + $this->out("\n\n" . implode("\n", $out)); + } + } + + /** + * handle <table> tags + * + * @param void + * @return void + */ + protected function handleTag_table() + { + if ($this->parser->isStartTag) { + // check if upcoming table can be converted + if ($this->keepHTML) { + if (preg_match($this->tableLookaheadHeader, $this->parser->html, $matches)) { + // header seems good, now check body + // get align & number of cols + preg_match_all('#<th(?:\s+align=("|\')(left|right|center)\1)?\s*>#si', $matches[0], $cols); + $regEx = ''; + $i = 1; + $aligns = array(); + foreach ($cols[2] as $align) { + $align = strtolower($align); + array_push($aligns, $align); + if (empty($align)) { + $align = 'left'; // default value + } + $td = '\s+align=("|\')' . $align . '\\' . $i; + $i++; + if ($align == 'left') { + // look for empty align or left + $td = '(?:' . $td . ')?'; + } + $td = '<td' . $td . '\s*>'; + $regEx .= $td . $this->tdSubstitute; + } + $regEx = sprintf($this->tableLookaheadBody, $regEx); + if (preg_match($regEx, $this->parser->html, $matches, null, strlen($matches[0]))) { + // this is a markdownable table tag! + $this->table = array( + 'rows' => array(), + 'col_widths' => array(), + 'aligns' => $aligns, + ); + $this->row = 0; + } else { + // non markdownable table + $this->handleTagToText(); + } + } else { + // non markdownable table + $this->handleTagToText(); + } + } else { + $this->table = array( + 'rows' => array(), + 'col_widths' => array(), + 'aligns' => array(), + ); + $this->row = 0; + } + } else { + // finally build the table in Markdown Extra syntax + $separator = array(); + if (!isset($this->table['aligns'])) { + $this->table['aligns'] = array(); + } + // seperator with correct align identifiers + foreach ($this->table['aligns'] as $col => $align) { + if (!$this->keepHTML && !isset($this->table['col_widths'][$col])) { + break; + } + $left = ' '; + $right = ' '; + switch ($align) { + case 'left': + $left = ':'; + break; + case 'center': + $right = ':'; + $left = ':'; + case 'right': + $right = ':'; + break; + } + array_push($separator, $left . str_repeat('-', $this->table['col_widths'][$col]) . $right); + } + $separator = '|' . implode('|', $separator) . '|'; + + $rows = array(); + // add padding + array_walk_recursive($this->table['rows'], array(&$this, 'alignTdContent')); + $header = array_shift($this->table['rows']); + array_push($rows, '| ' . implode(' | ', $header) . ' |'); + array_push($rows, $separator); + foreach ($this->table['rows'] as $row) { + array_push($rows, '| ' . implode(' | ', $row) . ' |'); + } + $this->out(implode("\n" . $this->indent, $rows)); + $this->table = array(); + $this->setLineBreaks(2); + } + } + + /** + * properly pad content so it is aligned as whished + * should be used with array_walk_recursive on $this->table['rows'] + * + * @param string &$content + * @param int $col + * @return void + */ + protected function alignTdContent(&$content, $col) + { + if (!isset($this->table['aligns'][$col])) { + $this->table['aligns'][$col] = 'left'; + } + switch ($this->table['aligns'][$col]) { + default: + case 'left': + $content .= str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)); + break; + case 'right': + $content = str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)) . $content; + break; + case 'center': + $paddingNeeded = $this->table['col_widths'][$col] - $this->strlen($content); + $left = floor($paddingNeeded / 2); + $right = $paddingNeeded - $left; + $content = str_repeat(' ', $left) . $content . str_repeat(' ', $right); + break; + } + } + + /** + * handle <tr> tags + * + * @param void + * @return void + */ + protected function handleTag_tr() + { + if ($this->parser->isStartTag) { + $this->col = -1; + } else { + $this->row++; + } + } + + /** + * handle <td> tags + * + * @param void + * @return void + */ + protected function handleTag_td() + { + if ($this->parser->isStartTag) { + $this->col++; + if (!isset($this->table['col_widths'][$this->col])) { + $this->table['col_widths'][$this->col] = 0; + } + $this->buffer(); + } else { + $buffer = trim($this->unbuffer()); + if (!isset($this->table['col_widths'][$this->col])) { + $this->table['col_widths'][$this->col] = 0; + } + $this->table['col_widths'][$this->col] = max($this->table['col_widths'][$this->col], $this->strlen($buffer)); + $this->table['rows'][$this->row][$this->col] = $buffer; + } + } + + /** + * handle <th> tags + * + * @param void + * @return void + */ + protected function handleTag_th() + { + if (!$this->keepHTML && !isset($this->table['rows'][1]) && !isset($this->table['aligns'][$this->col + 1])) { + if (isset($this->parser->tagAttributes['align'])) { + $this->table['aligns'][$this->col + 1] = $this->parser->tagAttributes['align']; + } else { + $this->table['aligns'][$this->col + 1] = ''; + } + } + $this->handleTag_td(); + } + + /** + * handle <dl> tags + * + * @param void + * @return void + */ + protected function handleTag_dl() + { + if (!$this->parser->isStartTag) { + $this->setLineBreaks(2); + } + } + + /** + * handle <dt> tags + * + * @param void + * @return void + **/ + protected function handleTag_dt() + { + if (!$this->parser->isStartTag) { + $this->setLineBreaks(1); + } + } + + /** + * handle <dd> tags + * + * @param void + * @return void + */ + protected function handleTag_dd() + { + if ($this->parser->isStartTag) { + if (substr(ltrim($this->parser->html), 0, 3) == '<p>') { + // next comes a paragraph, so we'll need an extra line + $this->out("\n" . $this->indent); + } elseif (substr($this->output, -2) == "\n\n") { + $this->output = substr($this->output, 0, -1); + } + $this->out(': '); + $this->indent(' ', false); + } else { + // lookahead for next dt + if (substr(ltrim($this->parser->html), 0, 4) == '<dt>') { + $this->setLineBreaks(2); + } else { + $this->setLineBreaks(1); + } + $this->indent(' '); + } + } + + /** + * handle <fnref /> tags (custom footnote references, see markdownify_extra::parseString()) + * + * @param void + * @return void + */ + protected function handleTag_fnref() + { + $this->out('[^' . $this->parser->tagAttributes['target'] . ']'); + } + + /** + * handle <fn> tags (custom footnotes, see markdownify_extra::parseString() + * and markdownify_extra::_makeFootnotes()) + * + * @param void + * @return void + */ + protected function handleTag_fn() + { + if ($this->parser->isStartTag) { + $this->out('[^' . $this->parser->tagAttributes['name'] . ']:'); + $this->setLineBreaks(1); + } else { + $this->setLineBreaks(2); + } + $this->indent(' '); + } + + /** + * handle <footnotes> tag (custom footnotes, see markdownify_extra::parseString() + * and markdownify_extra::_makeFootnotes()) + * + * @param void + * @return void + */ + protected function handleTag_footnotes() + { + if (!$this->parser->isStartTag) { + $this->setLineBreaks(2); + } + } + + /** + * parse a HTML string, clean up footnotes prior + * + * @param string $HTML input + * @return string Markdown formatted output + */ + public function parseString($html) + { + /** TODO: custom markdown-extra options, e.g. titles & classes **/ + // <sup id="fnref:..."><a href"#fn..." rel="footnote">...</a></sup> + // => <fnref target="..." /> + $html = preg_replace('@<sup id="fnref:([^"]+)">\s*<a href="#fn:\1" rel="footnote">\s*\d+\s*</a>\s*</sup>@Us', '<fnref target="$1" />', $html); + // <div class="footnotes"> + // <hr /> + // <ol> + // + // <li id="fn:...">...</li> + // ... + // + // </ol> + // </div> + // => + // <footnotes> + // <fn name="...">...</fn> + // ... + // </footnotes> + $html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(.+)\s*</ol>\s*</div>#Us', array(&$this, '_makeFootnotes'), $html); + + return parent::parseString($html); + } + + /** + * replace HTML representation of footnotes with something more easily parsable + * + * @note this is a callback to be used in parseString() + * + * @param array $matches + * @return string + */ + protected function _makeFootnotes($matches) + { + // <li id="fn:1"> + // ... + // <a href="#fnref:block" rev="footnote">↩</a></p> + // </li> + // => <fn name="1">...</fn> + // remove footnote link + $fns = preg_replace('@\s*( \s*)?<a href="#fnref:[^"]+" rev="footnote"[^>]*>↩</a>\s*@s', '', $matches[1]); + // remove empty paragraph + $fns = preg_replace('@<p>\s*</p>@s', '', $fns); + // <li id="fn:1">...</li> -> <footnote nr="1">...</footnote> + $fns = str_replace('<li id="fn:', '<fn name="', $fns); + + $fns = '<footnotes>' . $fns . '</footnotes>'; + + return preg_replace('#</li>\s*(?=(?:<fn|</footnotes>))#s', '</fn>$1', $fns); + } + + /** + * handle <a> tags parsing + * + * @param void + * @return void + */ + protected function getCurrentCssSelector() + { + $cssSelector = ''; + if (isset($this->parser->tagAttributes['id'])) { + $cssSelector .= '#' . $this->decode($this->parser->tagAttributes['id']); + } + if (isset($this->parser->tagAttributes['class'])) { + $classes = explode(' ', $this->decode($this->parser->tagAttributes['class'])); + $classes = array_filter($classes); + $cssSelector .= '.' . join('.', $classes); + } + return $cssSelector; + } +} |