diff options
Diffstat (limited to 'include/markdownify/markdownify_extra.php')
-rw-r--r-- | include/markdownify/markdownify_extra.php | 489 |
1 files changed, 0 insertions, 489 deletions
diff --git a/include/markdownify/markdownify_extra.php b/include/markdownify/markdownify_extra.php deleted file mode 100644 index e978a1c8a..000000000 --- a/include/markdownify/markdownify_extra.php +++ /dev/null @@ -1,489 +0,0 @@ -<?php -/** - * Class to convert HTML to Markdown with PHP Markdown Extra syntax support. - * - * @version 1.0.0 alpha - * @author Milian Wolff (<mail@milianw.de>, <http://milianw.de>) - * @license LGPL, see LICENSE_LGPL.txt and the summary below - * @copyright (C) 2007 Milian Wolff - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/** - * standard Markdownify class - */ -require_once dirname(__FILE__).'/markdownify.php'; - -class Markdownify_Extra extends Markdownify { - /** - * table data, including rows with content and the maximum width of each col - * - * @var array - */ - var $table = array(); - /** - * current col - * - * @var int - */ - var $col = -1; - /** - * current row - * - * @var int - */ - var $row = 0; - /** - * constructor, see Markdownify::Markdownify() for more information - */ - function Markdownify_Extra($linksAfterEachParagraph = MDFY_LINKS_EACH_PARAGRAPH, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML) { - parent::Markdownify($linksAfterEachParagraph, $bodyWidth, $keepHTML); - - ### new markdownable tags & attributes - # header ids: # foo {bar} - $this->isMarkdownable['h1']['id'] = 'optional'; - $this->isMarkdownable['h2']['id'] = 'optional'; - $this->isMarkdownable['h3']['id'] = 'optional'; - $this->isMarkdownable['h4']['id'] = 'optional'; - $this->isMarkdownable['h5']['id'] = 'optional'; - $this->isMarkdownable['h6']['id'] = 'optional'; - # tables - $this->isMarkdownable['table'] = array(); - $this->isMarkdownable['th'] = array( - 'align' => 'optional', - ); - $this->isMarkdownable['td'] = array( - 'align' => 'optional', - ); - $this->isMarkdownable['tr'] = array(); - array_push($this->ignore, 'thead'); - array_push($this->ignore, 'tbody'); - array_push($this->ignore, 'tfoot'); - # definition lists - $this->isMarkdownable['dl'] = array(); - $this->isMarkdownable['dd'] = array(); - $this->isMarkdownable['dt'] = array(); - # footnotes - $this->isMarkdownable['fnref'] = array( - 'target' => 'required', - ); - $this->isMarkdownable['footnotes'] = array(); - $this->isMarkdownable['fn'] = array( - 'name' => 'required', - ); - $this->parser->blockElements['fnref'] = false; - $this->parser->blockElements['fn'] = true; - $this->parser->blockElements['footnotes'] = true; - # abbr - $this->isMarkdownable['abbr'] = array( - 'title' => 'required', - ); - # build RegEx lookahead to decide wether table can pe parsed or not - $inlineTags = array_keys($this->parser->blockElements, false); - $colContents = '(?:[^<]|<(?:'.implode('|', $inlineTags).'|[^a-z]))+'; - $this->tableLookaheadHeader = '{ - ^\s*(?:<thead\s*>)?\s* # open optional thead - <tr\s*>\s*(?: # start required row with headers - <th(?:\s+align=("|\')(?:left|center|right)\1)?\s*> # header with optional align - \s*'.$colContents.'\s* # contents - </th>\s* # close header - )+</tr> # close row with headers - \s*(?:</thead>)? # close optional thead - }sxi'; - $this->tdSubstitute = '\s*'.$colContents.'\s* # contents - </td>\s*'; - $this->tableLookaheadBody = '{ - \s*(?:<tbody\s*>)?\s* # open optional tbody - (?:<tr\s*>\s* # start row - %s # cols to be substituted - </tr>)+ # close row - \s*(?:</tbody>)? # close optional tbody - \s*</table> # close table - }sxi'; - } - /** - * handle header tags (<h1> - <h6>) - * - * @param int $level 1-6 - * @return void - */ - function handleHeader($level) { - static $id = null; - if ($this->parser->isStartTag) { - if (isset($this->parser->tagAttributes['id'])) { - $id = $this->parser->tagAttributes['id']; - } - } else { - if (!is_null($id)) { - $this->out(' {#'.$id.'}'); - $id = null; - } - } - parent::handleHeader($level); - } - /** - * handle <abbr> tags - * - * @param void - * @return void - */ - function handleTag_abbr() { - if ($this->parser->isStartTag) { - $this->stack(); - $this->buffer(); - } else { - $tag = $this->unstack(); - $tag['text'] = $this->unbuffer(); - $add = true; - foreach ($this->stack['abbr'] as $stacked) { - if ($stacked['text'] == $tag['text']) { - /** TODO: differing abbr definitions, i.e. different titles for same text **/ - $add = false; - break; - } - } - $this->out($tag['text']); - if ($add) { - array_push($this->stack['abbr'], $tag); - } - } - } - /** - * flush stacked abbr tags - * - * @param void - * @return void - */ - function flushStacked_abbr() { - $out = array(); - foreach ($this->stack['abbr'] as $k => $tag) { - if (!isset($tag['unstacked'])) { - array_push($out, ' *['.$tag['text'].']: '.$tag['title']); - $tag['unstacked'] = true; - $this->stack['abbr'][$k] = $tag; - } - } - if (!empty($out)) { - $this->out("\n\n".implode("\n", $out)); - } - } - /** - * handle <table> tags - * - * @param void - * @return void - */ - function handleTag_table() { - if ($this->parser->isStartTag) { - # check if upcoming table can be converted - if ($this->keepHTML) { - if (preg_match($this->tableLookaheadHeader, $this->parser->html, $matches)) { - # header seems good, now check body - # get align & number of cols - preg_match_all('#<th(?:\s+align=("|\')(left|right|center)\1)?\s*>#si', $matches[0], $cols); - $regEx = ''; - $i = 1; - $aligns = array(); - foreach ($cols[2] as $align) { - $align = strtolower($align); - array_push($aligns, $align); - if (empty($align)) { - $align = 'left'; # default value - } - $td = '\s+align=("|\')'.$align.'\\'.$i; - $i++; - if ($align == 'left') { - # look for empty align or left - $td = '(?:'.$td.')?'; - } - $td = '<td'.$td.'\s*>'; - $regEx .= $td.$this->tdSubstitute; - } - $regEx = sprintf($this->tableLookaheadBody, $regEx); - if (preg_match($regEx, $this->parser->html, $matches, null, strlen($matches[0]))) { - # this is a markdownable table tag! - $this->table = array( - 'rows' => array(), - 'col_widths' => array(), - 'aligns' => $aligns, - ); - $this->row = 0; - } else { - # non markdownable table - $this->handleTagToText(); - } - } else { - # non markdownable table - $this->handleTagToText(); - } - } else { - $this->table = array( - 'rows' => array(), - 'col_widths' => array(), - 'aligns' => array(), - ); - $this->row = 0; - } - } else { - # finally build the table in Markdown Extra syntax - $separator = array(); - # seperator with correct align identifikators - foreach($this->table['aligns'] as $col => $align) { - if (!$this->keepHTML && !isset($this->table['col_widths'][$col])) { - break; - } - $left = ' '; - $right = ' '; - switch ($align) { - case 'left': - $left = ':'; - break; - case 'center': - $right = ':'; - $left = ':'; - case 'right': - $right = ':'; - break; - } - array_push($separator, $left.str_repeat('-', $this->table['col_widths'][$col]).$right); - } - $separator = '|'.implode('|', $separator).'|'; - - $rows = array(); - # add padding - array_walk_recursive($this->table['rows'], array(&$this, 'alignTdContent')); - $header = array_shift($this->table['rows']); - array_push($rows, '| '.implode(' | ', $header).' |'); - array_push($rows, $separator); - foreach ($this->table['rows'] as $row) { - array_push($rows, '| '.implode(' | ', $row).' |'); - } - $this->out(implode("\n".$this->indent, $rows)); - $this->table = array(); - $this->setLineBreaks(2); - } - } - /** - * properly pad content so it is aligned as whished - * should be used with array_walk_recursive on $this->table['rows'] - * - * @param string &$content - * @param int $col - * @return void - */ - function alignTdContent(&$content, $col) { - switch ($this->table['aligns'][$col]) { - default: - case 'left': - $content .= str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)); - break; - case 'right': - $content = str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)).$content; - break; - case 'center': - $paddingNeeded = $this->table['col_widths'][$col] - $this->strlen($content); - $left = floor($paddingNeeded / 2); - $right = $paddingNeeded - $left; - $content = str_repeat(' ', $left).$content.str_repeat(' ', $right); - break; - } - } - /** - * handle <tr> tags - * - * @param void - * @return void - */ - function handleTag_tr() { - if ($this->parser->isStartTag) { - $this->col = -1; - } else { - $this->row++; - } - } - /** - * handle <td> tags - * - * @param void - * @return void - */ - function handleTag_td() { - if ($this->parser->isStartTag) { - $this->col++; - if (!isset($this->table['col_widths'][$this->col])) { - $this->table['col_widths'][$this->col] = 0; - } - $this->buffer(); - } else { - $buffer = trim($this->unbuffer()); - $this->table['col_widths'][$this->col] = max($this->table['col_widths'][$this->col], $this->strlen($buffer)); - $this->table['rows'][$this->row][$this->col] = $buffer; - } - } - /** - * handle <th> tags - * - * @param void - * @return void - */ - function handleTag_th() { - if (!$this->keepHTML && !isset($this->table['rows'][1]) && !isset($this->table['aligns'][$this->col+1])) { - if (isset($this->parser->tagAttributes['align'])) { - $this->table['aligns'][$this->col+1] = $this->parser->tagAttributes['align']; - } else { - $this->table['aligns'][$this->col+1] = ''; - } - } - $this->handleTag_td(); - } - /** - * handle <dl> tags - * - * @param void - * @return void - */ - function handleTag_dl() { - if (!$this->parser->isStartTag) { - $this->setLineBreaks(2); - } - } - /** - * handle <dt> tags - * - * @param void - * @return void - **/ - function handleTag_dt() { - if (!$this->parser->isStartTag) { - $this->setLineBreaks(1); - } - } - /** - * handle <dd> tags - * - * @param void - * @return void - */ - function handleTag_dd() { - if ($this->parser->isStartTag) { - if (substr(ltrim($this->parser->html), 0, 3) == '<p>') { - # next comes a paragraph, so we'll need an extra line - $this->out("\n".$this->indent); - } elseif (substr($this->output, -2) == "\n\n") { - $this->output = substr($this->output, 0, -1); - } - $this->out(': '); - $this->indent(' ', false); - } else { - # lookahead for next dt - if (substr(ltrim($this->parser->html), 0, 4) == '<dt>') { - $this->setLineBreaks(2); - } else { - $this->setLineBreaks(1); - } - $this->indent(' '); - } - } - /** - * handle <fnref /> tags (custom footnote references, see markdownify_extra::parseString()) - * - * @param void - * @return void - */ - function handleTag_fnref() { - $this->out('[^'.$this->parser->tagAttributes['target'].']'); - } - /** - * handle <fn> tags (custom footnotes, see markdownify_extra::parseString() - * and markdownify_extra::_makeFootnotes()) - * - * @param void - * @return void - */ - function handleTag_fn() { - if ($this->parser->isStartTag) { - $this->out('[^'.$this->parser->tagAttributes['name'].']:'); - $this->setLineBreaks(1); - } else { - $this->setLineBreaks(2); - } - $this->indent(' '); - } - /** - * handle <footnotes> tag (custom footnotes, see markdownify_extra::parseString() - * and markdownify_extra::_makeFootnotes()) - * - * @param void - * @return void - */ - function handleTag_footnotes() { - if (!$this->parser->isStartTag) { - $this->setLineBreaks(2); - } - } - /** - * parse a HTML string, clean up footnotes prior - * - * @param string $HTML input - * @return string Markdown formatted output - */ - function parseString($html) { - /** TODO: custom markdown-extra options, e.g. titles & classes **/ - # <sup id="fnref:..."><a href"#fn..." rel="footnote">...</a></sup> - # => <fnref target="..." /> - $html = preg_replace('@<sup id="fnref:([^"]+)">\s*<a href="#fn:\1" rel="footnote">\s*\d+\s*</a>\s*</sup>@Us', '<fnref target="$1" />', $html); - # <div class="footnotes"> - # <hr /> - # <ol> - # - # <li id="fn:...">...</li> - # ... - # - # </ol> - # </div> - # => - # <footnotes> - # <fn name="...">...</fn> - # ... - # </footnotes> - $html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(.+)\s*</ol>\s*</div>#Us', array(&$this, '_makeFootnotes'), $html); - return parent::parseString($html); - } - /** - * replace HTML representation of footnotes with something more easily parsable - * - * @note this is a callback to be used in parseString() - * - * @param array $matches - * @return string - */ - function _makeFootnotes($matches) { - # <li id="fn:1"> - # ... - # <a href="#fnref:block" rev="footnote">↩</a></p> - # </li> - # => <fn name="1">...</fn> - # remove footnote link - $fns = preg_replace('@\s*( \s*)?<a href="#fnref:[^"]+" rev="footnote"[^>]*>↩</a>\s*@s', '', $matches[1]); - # remove empty paragraph - $fns = preg_replace('@<p>\s*</p>@s', '', $fns); - # <li id="fn:1">...</li> -> <footnote nr="1">...</footnote> - $fns = str_replace('<li id="fn:', '<fn name="', $fns); - - $fns = '<footnotes>'.$fns.'</footnotes>'; - return preg_replace('#</li>\s*(?=(?:<fn|</footnotes>))#s', '</fn>$1', $fns); - } -}
\ No newline at end of file |