From 6c79e0c077971029343b2dff30017571ea118438 Mon Sep 17 00:00:00 2001 From: Klaus Weidenbach Date: Thu, 2 Mar 2017 23:25:04 +0100 Subject: :arrow_up: :hammer: Upgrade Markdownify library. The current version 2.0.0 (alpha) throws deprecated warning with PHP7.1 and PHPUnit. Upgrade the HTML to Markdown converter for PHP to the current Markdownify 2.2.1. Used composer to manage this library. --- library/markdownify/markdownify.php | 1197 ----------------------------------- 1 file changed, 1197 deletions(-) delete mode 100644 library/markdownify/markdownify.php (limited to 'library/markdownify/markdownify.php') diff --git a/library/markdownify/markdownify.php b/library/markdownify/markdownify.php deleted file mode 100644 index 0d4429a01..000000000 --- a/library/markdownify/markdownify.php +++ /dev/null @@ -1,1197 +0,0 @@ -, ) - * @license LGPL, see LICENSE_LGPL.txt and the summary below - * @copyright (C) 2007 Milian Wolff - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/** - * HTML Parser, see http://sf.net/projects/parseHTML - */ -require_once dirname(__FILE__).'/parsehtml/parsehtml.php'; - -/** - * default configuration - */ -define('MDFY_LINKS_EACH_PARAGRAPH', false); -define('MDFY_BODYWIDTH', false); -define('MDFY_KEEPHTML', true); - -/** - * HTML to Markdown converter class - */ -class Markdownify { - /** - * html parser object - * - * @var parseHTML - */ - var $parser; - /** - * markdown output - * - * @var string - */ - var $output; - /** - * stack with tags which where not converted to html - * - * @var array - */ - var $notConverted = array(); - /** - * skip conversion to markdown - * - * @var bool - */ - var $skipConversion = false; - /* options */ - /** - * keep html tags which cannot be converted to markdown - * - * @var bool - */ - var $keepHTML = false; - /** - * wrap output, set to 0 to skip wrapping - * - * @var int - */ - var $bodyWidth = 0; - /** - * minimum body width - * - * @var int - */ - var $minBodyWidth = 25; - /** - * display links after each paragraph - * - * @var bool - */ - var $linksAfterEachParagraph = false; - /** - * constructor, set options, setup parser - * - * @param bool $linksAfterEachParagraph wether or not to flush stacked links after each paragraph - * defaults to false - * @param int $bodyWidth wether or not to wrap the output to the given width - * defaults to false - * @param bool $keepHTML wether to keep non markdownable HTML or to discard it - * defaults to true (HTML will be kept) - * @return void - */ - function Markdownify($linksAfterEachParagraph = MDFY_LINKS_EACH_PARAGRAPH, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML) { - $this->linksAfterEachParagraph = $linksAfterEachParagraph; - $this->keepHTML = $keepHTML; - - if ($bodyWidth > $this->minBodyWidth) { - $this->bodyWidth = intval($bodyWidth); - } else { - $this->bodyWidth = false; - } - - $this->parser = new parseHTML; - $this->parser->noTagsInCode = true; - - # we don't have to do this every time - $search = array(); - $replace = array(); - foreach ($this->escapeInText as $s => $r) { - array_push($search, '#(?escapeInText = array( - 'search' => $search, - 'replace' => $replace - ); - } - /** - * parse a HTML string - * - * @param string $html - * @return string markdown formatted - */ - function parseString($html) { - $this->parser->html = $html; - $this->parse(); - return $this->output; - } - /** - * tags with elements which can be handled by markdown - * - * @var array - */ - var $isMarkdownable = array( - 'p' => array(), - 'ul' => array(), - 'ol' => array(), - 'li' => array(), - 'br' => array(), - 'blockquote' => array(), - 'code' => array(), - 'pre' => array(), - 'a' => array( - 'href' => 'required', - 'title' => 'optional', - ), - 'strong' => array(), - 'b' => array(), - 'em' => array(), - 'i' => array(), - 'img' => array( - 'src' => 'required', - 'alt' => 'optional', - 'title' => 'optional', - ), - 'h1' => array(), - 'h2' => array(), - 'h3' => array(), - 'h4' => array(), - 'h5' => array(), - 'h6' => array(), - 'hr' => array(), - ); - /** - * html tags to be ignored (contents will be parsed) - * - * @var array - */ - var $ignore = array( - 'html', - 'body', - ); - /** - * html tags to be dropped (contents will not be parsed!) - * - * @var array - */ - var $drop = array( - 'script', - 'head', - 'style', - 'form', - 'area', - 'object', - 'param', - 'iframe', - ); - /** - * Markdown indents which could be wrapped - * @note: use strings in regex format - * - * @var array - */ - var $wrappableIndents = array( - '\* ', # ul - '\d. ', # ol - '\d\d. ', # ol - '> ', # blockquote - '', # p - ); - /** - * list of chars which have to be escaped in normal text - * @note: use strings in regex format - * - * @var array - * - * TODO: what's with block chars / sequences at the beginning of a block? - */ - var $escapeInText = array( - '([-*_])([ ]{0,2}\1){2,}' => '\\\\$0|', # hr - '\*\*([^*\s]+)\*\*' => '\*\*$1\*\*', # strong - '\*([^*\s]+)\*' => '\*$1\*', # em - '__(?! |_)(.+)(?!<_| )__' => '\_\_$1\_\_', # em - '_(?! |_)(.+)(?!<_| )_' => '\_$1\_', # em - '`(.+)`' => '\`$1\`', # code - '\[(.+)\](\s*\()' => '\[$1\]$2', # links: [text] (url) => [text\] (url) - '\[(.+)\](\s*)\[(.*)\]' => '\[$1\]$2\[$3\]', # links: [text][id] => [text\][id\] - ); - /** - * wether last processed node was a block tag or not - * - * @var bool - */ - var $lastWasBlockTag = false; - /** - * name of last closed tag - * - * @var string - */ - var $lastClosedTag = ''; - /** - * iterate through the nodes and decide what we - * shall do with the current node - * - * @param void - * @return void - */ - function parse() { - $this->output = ''; - # drop tags - $this->parser->html = preg_replace('#<('.implode('|', $this->drop).')[^>]*>.*#sU', '', $this->parser->html); - while ($this->parser->nextNode()) { - switch ($this->parser->nodeType) { - case 'doctype': - break; - case 'pi': - case 'comment': - if ($this->keepHTML) { - $this->flushLinebreaks(); - $this->out($this->parser->node); - $this->setLineBreaks(2); - } - # else drop - break; - case 'text': - $this->handleText(); - break; - case 'tag': - if (in_array($this->parser->tagName, $this->ignore)) { - break; - } - if ($this->parser->isStartTag) { - $this->flushLinebreaks(); - } - if ($this->skipConversion) { - $this->isMarkdownable(); # update notConverted - $this->handleTagToText(); - continue; - } - if (!$this->parser->keepWhitespace && $this->parser->isBlockElement && $this->parser->isStartTag) { - $this->parser->html = ltrim($this->parser->html); - } - if ($this->isMarkdownable()) { - if ($this->parser->isBlockElement && $this->parser->isStartTag && !$this->lastWasBlockTag && !empty($this->output)) { - if (!empty($this->buffer)) { - $str =& $this->buffer[count($this->buffer) -1]; - } else { - $str =& $this->output; - } - if (substr($str, -strlen($this->indent)-1) != "\n".$this->indent) { - $str .= "\n".$this->indent; - } - } - $func = 'handleTag_'.$this->parser->tagName; - $this->$func(); - if ($this->linksAfterEachParagraph && $this->parser->isBlockElement && !$this->parser->isStartTag && empty($this->parser->openTags)) { - $this->flushStacked(); - } - if (!$this->parser->isStartTag) { - $this->lastClosedTag = $this->parser->tagName; - } - } else { - $this->handleTagToText(); - $this->lastClosedTag = ''; - } - break; - default: - trigger_error('invalid node type', E_USER_ERROR); - break; - } - $this->lastWasBlockTag = $this->parser->nodeType == 'tag' && $this->parser->isStartTag && $this->parser->isBlockElement; - } - if (!empty($this->buffer)) { - trigger_error('buffer was not flushed, this is a bug. please report!', E_USER_WARNING); - while (!empty($this->buffer)) { - $this->out($this->unbuffer()); - } - } - ### cleanup - $this->output = rtrim(str_replace('&', '&', str_replace('<', '<', str_replace('>', '>', $this->output)))); - # end parsing, flush stacked tags - $this->flushStacked(); - $this->stack = array(); - } - /** - * check if current tag can be converted to Markdown - * - * @param void - * @return bool - */ - function isMarkdownable() { - if (!isset($this->isMarkdownable[$this->parser->tagName])) { - # simply not markdownable - return false; - } - if ($this->parser->isStartTag) { - $return = true; - if ($this->keepHTML) { - $diff = array_diff(array_keys($this->parser->tagAttributes), array_keys($this->isMarkdownable[$this->parser->tagName])); - if (!empty($diff)) { - # non markdownable attributes given - $return = false; - } - } - if ($return) { - foreach ($this->isMarkdownable[$this->parser->tagName] as $attr => $type) { - if ($type == 'required' && !isset($this->parser->tagAttributes[$attr])) { - # required markdown attribute not given - $return = false; - break; - } - } - } - if (!$return) { - array_push($this->notConverted, $this->parser->tagName.'::'.implode('/', $this->parser->openTags)); - } - return $return; - } else { - if (!empty($this->notConverted) && end($this->notConverted) === $this->parser->tagName.'::'.implode('/', $this->parser->openTags)) { - array_pop($this->notConverted); - return false; - } - return true; - } - } - /** - * output all stacked tags - * - * @param void - * @return void - */ - function flushStacked() { - # links - foreach ($this->stack as $tag => $a) { - if (!empty($a)) { - call_user_func(array(&$this, 'flushStacked_'.$tag)); - } - } - } - /** - * output link references (e.g. [1]: http://example.com "title"); - * - * @param void - * @return void - */ - function flushStacked_a() { - $out = false; - foreach ($this->stack['a'] as $k => $tag) { - if (!isset($tag['unstacked'])) { - if (!$out) { - $out = true; - $this->out("\n\n", true); - } else { - $this->out("\n", true); - } - $this->out(' ['.$tag['linkID'].']: '.$tag['href'].(isset($tag['title']) ? ' "'.$tag['title'].'"' : ''), true); - $tag['unstacked'] = true; - $this->stack['a'][$k] = $tag; - } - } - } - /** - * flush enqued linebreaks - * - * @param void - * @return void - */ - function flushLinebreaks() { - if ($this->lineBreaks && !empty($this->output)) { - $this->out(str_repeat("\n".$this->indent, $this->lineBreaks), true); - } - $this->lineBreaks = 0; - } - /** - * handle non Markdownable tags - * - * @param void - * @return void - */ - function handleTagToText() { - if (!$this->keepHTML) { - if (!$this->parser->isStartTag && $this->parser->isBlockElement) { - $this->setLineBreaks(2); - } - } else { - # dont convert to markdown inside this tag - /** TODO: markdown extra **/ - if (!$this->parser->isEmptyTag) { - if ($this->parser->isStartTag) { - if (!$this->skipConversion) { - $this->skipConversion = $this->parser->tagName.'::'.implode('/', $this->parser->openTags); - } - } else { - if ($this->skipConversion == $this->parser->tagName.'::'.implode('/', $this->parser->openTags)) { - $this->skipConversion = false; - } - } - } - - if ($this->parser->isBlockElement) { - if ($this->parser->isStartTag) { - if (in_array($this->parent(), array('ins', 'del'))) { - # looks like ins or del are block elements now - $this->out("\n", true); - $this->indent(' '); - } - if ($this->parser->tagName != 'pre') { - $this->out($this->parser->node."\n".$this->indent); - if (!$this->parser->isEmptyTag) { - $this->indent(' '); - } else { - $this->setLineBreaks(1); - } - $this->parser->html = ltrim($this->parser->html); - } else { - # don't indent inside
 tags
-            $this->out($this->parser->node);
-            static $indent;
-            $indent =  $this->indent;
-            $this->indent = '';
-          }
-        } else {
-          if (!$this->parser->keepWhitespace) {
-            $this->output = rtrim($this->output);
-          }
-          if ($this->parser->tagName != 'pre') {
-            $this->indent('  ');
-            $this->out("\n".$this->indent.$this->parser->node);
-          } else {
-            # reset indentation
-            $this->out($this->parser->node);
-            static $indent;
-            $this->indent = $indent;
-          }
-
-          if (in_array($this->parent(), array('ins', 'del'))) {
-            # ins or del was block element
-            $this->out("\n");
-            $this->indent('  ');
-          }
-          if ($this->parser->tagName == 'li') {
-            $this->setLineBreaks(1);
-          } else {
-            $this->setLineBreaks(2);
-          }
-        }
-      } else {
-        $this->out($this->parser->node);
-      }
-      if (in_array($this->parser->tagName, array('code', 'pre'))) {
-        if ($this->parser->isStartTag) {
-          $this->buffer();
-        } else {
-          # add stuff so cleanup just reverses this
-          $this->out(str_replace('<', '&lt;', str_replace('>', '&gt;', $this->unbuffer())));
-        }
-      }
-    }
-  }
-  /**
-   * handle plain text
-   *
-   * @param void
-   * @return void
-   */
-  function handleText() {
-    if ($this->hasParent('pre') && strpos($this->parser->node, "\n") !== false) {
-      $this->parser->node = str_replace("\n", "\n".$this->indent, $this->parser->node);
-    }
-    if (!$this->hasParent('code') && !$this->hasParent('pre')) {
-      # entity decode
-      $this->parser->node = $this->decode($this->parser->node);
-      if (!$this->skipConversion) {
-        # escape some chars in normal Text
-        $this->parser->node = preg_replace($this->escapeInText['search'], $this->escapeInText['replace'], $this->parser->node);
-      }
-    } else {
-      $this->parser->node = str_replace(array('"', '&apos'), array('"', '\''), $this->parser->node);
-    }
-    $this->out($this->parser->node);
-    $this->lastClosedTag = '';
-  }
-  /**
-   * handle  and  tags
-   *
-   * @param void
-   * @return void
-   */
-  function handleTag_em() {
-    $this->out('*', true);
-  }
-  function handleTag_i() {
-    $this->handleTag_em();
-  }
-  /**
-   * handle  and  tags
-   *
-   * @param void
-   * @return void
-   */
-  function handleTag_strong() {
-    $this->out('**', true);
-  }
-  function handleTag_b() {
-    $this->handleTag_strong();
-  }
-  /**
-   * handle 

tags - * - * @param void - * @return void - */ - function handleTag_h1() { - $this->handleHeader(1); - } - /** - * handle

tags - * - * @param void - * @return void - */ - function handleTag_h2() { - $this->handleHeader(2); - } - /** - * handle

tags - * - * @param void - * @return void - */ - function handleTag_h3() { - $this->handleHeader(3); - } - /** - * handle

tags - * - * @param void - * @return void - */ - function handleTag_h4() { - $this->handleHeader(4); - } - /** - * handle

tags - * - * @param void - * @return void - */ - function handleTag_h5() { - $this->handleHeader(5); - } - /** - * handle
tags - * - * @param void - * @return void - */ - function handleTag_h6() { - $this->handleHeader(6); - } - /** - * number of line breaks before next inline output - */ - var $lineBreaks = 0; - /** - * handle header tags (

-

) - * - * @param int $level 1-6 - * @return void - */ - function handleHeader($level) { - if ($this->parser->isStartTag) { - $this->out(str_repeat('#', $level).' ', true); - } else { - $this->setLineBreaks(2); - } - } - /** - * handle

tags - * - * @param void - * @return void - */ - function handleTag_p() { - if (!$this->parser->isStartTag) { - $this->setLineBreaks(2); - } - } - /** - * handle tags - * - * @param void - * @return void - */ - function handleTag_a() { - if ($this->parser->isStartTag) { - $this->buffer(); - if (isset($this->parser->tagAttributes['title'])) { - $this->parser->tagAttributes['title'] = $this->decode($this->parser->tagAttributes['title']); - } else { - $this->parser->tagAttributes['title'] = null; - } - $this->parser->tagAttributes['href'] = $this->decode(trim($this->parser->tagAttributes['href'])); - $this->stack(); - } else { - $tag = $this->unstack(); - $buffer = $this->unbuffer(); - - if (empty($tag['href']) && empty($tag['title'])) { - # empty links... testcase mania, who would possibly do anything like that?! - $this->out('['.$buffer.']()', true); - return; - } - - if ($buffer == $tag['href'] && empty($tag['title'])) { - # - $this->out('<'.$buffer.'>', true); - return; - } - - $bufferDecoded = $this->decode(trim($buffer)); - if (substr($tag['href'], 0, 7) == 'mailto:' && 'mailto:'.$bufferDecoded == $tag['href']) { - if (is_null($tag['title'])) { - # - $this->out('<'.$bufferDecoded.'>', true); - return; - } - # [mail@example.com][1] - # ... - # [1]: mailto:mail@example.com Title - $tag['href'] = 'mailto:'.$bufferDecoded; - } - - $this->out('['.$buffer.']('.$tag['href'].' "'.$tag['title'].'")', true); - -/* - # [This link][id] - foreach ($this->stack['a'] as $tag2) { - if ($tag2['href'] == $tag['href'] && $tag2['title'] === $tag['title']) { - $tag['linkID'] = $tag2['linkID']; - break; - } - } - if (!isset($tag['linkID'])) { - $tag['linkID'] = count($this->stack['a']) + 1; - array_push($this->stack['a'], $tag); - } - - $this->out('['.$buffer.']['.$tag['linkID'].']', true); -*/ - } - } - /** - * handle tags - * - * @param void - * @return void - */ - function handleTag_img() { - if (!$this->parser->isStartTag) { - return; # just to be sure this is really an empty tag... - } - - if (isset($this->parser->tagAttributes['title'])) { - $this->parser->tagAttributes['title'] = $this->decode($this->parser->tagAttributes['title']); - } else { - $this->parser->tagAttributes['title'] = null; - } - if (isset($this->parser->tagAttributes['alt'])) { - $this->parser->tagAttributes['alt'] = $this->decode($this->parser->tagAttributes['alt']); - } else { - $this->parser->tagAttributes['alt'] = null; - } - - if (empty($this->parser->tagAttributes['src'])) { - # support for "empty" images... dunno if this is really needed - # but there are some testcases which do that... - if (!empty($this->parser->tagAttributes['title'])) { - $this->parser->tagAttributes['title'] = ' '.$this->parser->tagAttributes['title'].' '; - } - $this->out('!['.$this->parser->tagAttributes['alt'].']('.$this->parser->tagAttributes['title'].')', true); - return; - } else { - $this->parser->tagAttributes['src'] = $this->decode($this->parser->tagAttributes['src']); - } - -// ![Alt text](/path/to/img.jpg "Optional title") - if ($this->parser->tagAttributes['title'] != "") - $this->out('!['.$this->parser->tagAttributes['alt'].']('.$this->parser->tagAttributes['src'].' "'.$this->parser->tagAttributes['title'].'")', true); - else - $this->out('!['.$this->parser->tagAttributes['alt'].']('.$this->parser->tagAttributes['src'].')', true); - -/* - # [This link][id] - $link_id = false; - if (!empty($this->stack['a'])) { - foreach ($this->stack['a'] as $tag) { - if ($tag['href'] == $this->parser->tagAttributes['src'] - && $tag['title'] === $this->parser->tagAttributes['title']) { - $link_id = $tag['linkID']; - break; - } - } - } else { - $this->stack['a'] = array(); - } - if (!$link_id) { - $link_id = count($this->stack['a']) + 1; - $tag = array( - 'href' => $this->parser->tagAttributes['src'], - 'linkID' => $link_id, - 'title' => $this->parser->tagAttributes['title'] - ); - array_push($this->stack['a'], $tag); - } - - $this->out('!['.$this->parser->tagAttributes['alt'].']['.$link_id.']', true); -*/ - } - /** - * handle tags - * - * @param void - * @return void - */ - function handleTag_code() { - if ($this->hasParent('pre')) { - # ignore code blocks inside

-      return;
-    }
-    if ($this->parser->isStartTag) {
-      $this->buffer();
-    } else {
-      $buffer = $this->unbuffer();
-      # use as many backticks as needed
-      preg_match_all('#`+#', $buffer, $matches);
-      if (!empty($matches[0])) {
-        rsort($matches[0]);
-
-        $ticks = '`';
-        while (true) {
-          if (!in_array($ticks, $matches[0])) {
-            break;
-          }
-          $ticks .= '`';
-        }
-      } else {
-        $ticks = '`';
-      }
-      if ($buffer[0] == '`' || substr($buffer, -1) == '`') {
-        $buffer = ' '.$buffer.' ';
-      }
-      $this->out($ticks.$buffer.$ticks, true);
-    }
-  }
-  /**
-   * handle 
 tags
-   *
-   * @param void
-   * @return void
-   */
-  function handleTag_pre() {
-    if ($this->keepHTML && $this->parser->isStartTag) {
-      # check if a simple  follows
-      if (!preg_match('#^\s*#Us', $this->parser->html)) {
-        # this is no standard markdown code block
-        $this->handleTagToText();
-        return;
-      }
-    }
-    $this->indent('    ');
-    if (!$this->parser->isStartTag) {
-      $this->setLineBreaks(2);
-    } else {
-      $this->parser->html = ltrim($this->parser->html);
-    }
-  }
-  /**
-   * handle 
tags - * - * @param void - * @return void - */ - function handleTag_blockquote() { - $this->indent('> '); - } - /** - * handle