From 547df2219ab4b870256f2ed90e36b97d8bf200bf Mon Sep 17 00:00:00 2001 From: Klaus Weidenbach Date: Tue, 23 May 2017 00:32:11 +0200 Subject: Replace Mardownify library with html-to-markdown library. --- vendor/pixel418/markdownify/src/Parser.php | 564 ----------------------------- 1 file changed, 564 deletions(-) delete mode 100644 vendor/pixel418/markdownify/src/Parser.php (limited to 'vendor/pixel418/markdownify/src/Parser.php') diff --git a/vendor/pixel418/markdownify/src/Parser.php b/vendor/pixel418/markdownify/src/Parser.php deleted file mode 100644 index 90fcdf9f8..000000000 --- a/vendor/pixel418/markdownify/src/Parser.php +++ /dev/null @@ -1,564 +0,0 @@ - etc.) - * - * @var array - */ - public $emptyTags = array( - 'br', - 'hr', - 'input', - 'img', - 'area', - 'link', - 'meta', - 'param', - ); - - /** - * tags with preformatted text - * whitespaces wont be touched in them - * - * @var array - */ - public $preformattedTags = array( - 'script', - 'style', - 'pre', - 'code', - ); - - /** - * supress HTML tags inside preformatted tags (see above) - * - * @var bool - */ - public $noTagsInCode = false; - - /** - * html to be parsed - * - * @var string - */ - public $html = ''; - - /** - * node type: - * - * - tag (see isStartTag) - * - text (includes cdata) - * - comment - * - doctype - * - pi (processing instruction) - * - * @var string - */ - public $nodeType = ''; - - /** - * current node content, i.e. either a - * simple string (text node), or something like - * - * - * @var string - */ - public $node = ''; - - /** - * wether current node is an opening tag () or not () - * set to NULL if current node is not a tag - * NOTE: empty tags (
) set this to true as well! - * - * @var bool | null - */ - public $isStartTag = null; - - /** - * wether current node is an empty tag (
) or not () - * - * @var bool | null - */ - public $isEmptyTag = null; - - /** - * tag name - * - * @var string | null - */ - public $tagName = ''; - - /** - * attributes of current tag - * - * @var array (attribName=>value) | null - */ - public $tagAttributes = null; - - /** - * whether or not the actual context is a inline context - * - * @var bool | null - */ - public $isInlineContext = null; - - /** - * whether the current tag is a block element - * - * @var bool | null - */ - public $isBlockElement = null; - - /** - * whether the previous tag (browser) is a block element - * - * @var bool | null - */ - public $isNextToInlineContext = null; - - /** - * keep whitespace - * - * @var int - */ - public $keepWhitespace = 0; - - /** - * list of open tags - * count this to get current depth - * - * @var array - */ - public $openTags = array(); - - /** - * list of block elements - * - * @var array - * TODO: what shall we do with and ?! - */ - public $blockElements = array( - // tag name => is block - // block elements - 'address' => true, - 'blockquote' => true, - 'center' => true, - 'del' => true, - 'dir' => true, - 'div' => true, - 'dl' => true, - 'fieldset' => true, - 'form' => true, - 'h1' => true, - 'h2' => true, - 'h3' => true, - 'h4' => true, - 'h5' => true, - 'h6' => true, - 'hr' => true, - 'ins' => true, - 'isindex' => true, - 'menu' => true, - 'noframes' => true, - 'noscript' => true, - 'ol' => true, - 'p' => true, - 'pre' => true, - 'table' => true, - 'ul' => true, - // set table elements and list items to block as well - 'thead' => true, - 'tbody' => true, - 'tfoot' => true, - 'td' => true, - 'tr' => true, - 'th' => true, - 'li' => true, - 'dd' => true, - 'dt' => true, - // header items and html / body as well - 'html' => true, - 'body' => true, - 'head' => true, - 'meta' => true, - 'link' => true, - 'style' => true, - 'title' => true, - // unfancy media tags, when indented should be rendered as block - 'map' => true, - 'object' => true, - 'param' => true, - 'embed' => true, - 'area' => true, - // inline elements - 'a' => false, - 'abbr' => false, - 'acronym' => false, - 'applet' => false, - 'b' => false, - 'basefont' => false, - 'bdo' => false, - 'big' => false, - 'br' => false, - 'button' => false, - 'cite' => false, - 'code' => false, - 'del' => false, - 'dfn' => false, - 'em' => false, - 'font' => false, - 'i' => false, - 'img' => false, - 'ins' => false, - 'input' => false, - 'iframe' => false, - 'kbd' => false, - 'label' => false, - 'q' => false, - 'samp' => false, - 'script' => false, - 'select' => false, - 'small' => false, - 'span' => false, - 'strong' => false, - 'sub' => false, - 'sup' => false, - 'textarea' => false, - 'tt' => false, - 'var' => false, - ); - - /** - * get next node, set $this->html prior! - * - * @param void - * @return bool - */ - public function nextNode() - { - if (empty($this->html)) { - // we are done with parsing the html string - - return false; - } - - if ($this->isStartTag && !$this->isEmptyTag) { - array_push($this->openTags, $this->tagName); - if (in_array($this->tagName, $this->preformattedTags)) { - // dont truncate whitespaces for or
 contents
-                $this->keepWhitespace++;
-            }
-        }
-
-        if ($this->html[0] == '<') {
-            $token = substr($this->html, 0, 9);
-            if (substr($token, 0, 2) == 'html, '>');
-                $this->setNode('pi', $pos + 1);
-
-                return true;
-            }
-            if (substr($token, 0, 4) == '');
-                if ($pos === false) {
-                    // could not find a closing -->, use next gt instead
-                    // this is firefox' behaviour
-                    $pos = strpos($this->html, '>') + 1;
-                } else {
-                    $pos += 3;
-                }
-                $this->setNode('comment', $pos);
-
-                static::$skipWhitespace = true;
-
-                return true;
-            }
-            if ($token == 'setNode('doctype', strpos($this->html, '>') + 1);
-
-                static::$skipWhitespace = true;
-
-                return true;
-            }
-            if ($token == 'html = substr($this->html, 9);
-
-                $this->setNode('text', strpos($this->html, ']]>') + 3);
-
-                // remove trailing ]]> and trim
-                $this->node = substr($this->node, 0, -3);
-                $this->handleWhitespaces();
-
-                static::$skipWhitespace = true;
-
-                return true;
-            }
-            if ($this->parseTag()) {
-                // seems to be a tag
-                // handle whitespaces
-                if ($this->isBlockElement) {
-                    static::$skipWhitespace = true;
-                } else {
-                    static::$skipWhitespace = false;
-                }
-
-                return true;
-            }
-        }
-        if ($this->keepWhitespace) {
-            static::$skipWhitespace = false;
-        }
-        // when we get here it seems to be a text node
-        $pos = strpos($this->html, '<');
-        if ($pos === false) {
-            $pos = strlen($this->html);
-        }
-        $this->setNode('text', $pos);
-        $this->handleWhitespaces();
-        if (static::$skipWhitespace && $this->node == ' ') {
-            return $this->nextNode();
-        }
-        $this->isInlineContext = true;
-        static::$skipWhitespace = false;
-
-        return true;
-    }
-
-    /**
-     * parse tag, set tag name and attributes, see if it's a closing tag and so forth...
-     *
-     * @param void
-     * @return bool
-     */
-    protected function parseTag()
-    {
-        if (!isset(static::$a_ord)) {
-            static::$a_ord = ord('a');
-            static::$z_ord = ord('z');
-            static::$special_ords = array(
-                ord(':'), // for xml:lang
-                ord('-'), // for http-equiv
-            );
-        }
-
-        $tagName = '';
-
-        $pos = 1;
-        $isStartTag = $this->html[$pos] != '/';
-        if (!$isStartTag) {
-            $pos++;
-        }
-        // get tagName
-        while (isset($this->html[$pos])) {
-            $pos_ord = ord(strtolower($this->html[$pos]));
-            if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || (!empty($tagName) && is_numeric($this->html[$pos]))) {
-                $tagName .= $this->html[$pos];
-                $pos++;
-            } else {
-                $pos--;
-                break;
-            }
-        }
-
-        $tagName = strtolower($tagName);
-        if (empty($tagName) || !isset($this->blockElements[$tagName])) {
-            // something went wrong => invalid tag
-            $this->invalidTag();
-
-            return false;
-        }
-        if ($this->noTagsInCode && end($this->openTags) == 'code' && !($tagName == 'code' && !$isStartTag)) {
-            // we supress all HTML tags inside code tags
-            $this->invalidTag();
-
-            return false;
-        }
-
-        // get tag attributes
-        /** TODO: in html 4 attributes do not need to be quoted **/
-        $isEmptyTag = false;
-        $attributes = array();
-        $currAttrib = '';
-        while (isset($this->html[$pos + 1])) {
-            $pos++;
-            // close tag
-            if ($this->html[$pos] == '>' || $this->html[$pos] . $this->html[$pos + 1] == '/>') {
-                if ($this->html[$pos] == '/') {
-                    $isEmptyTag = true;
-                    $pos++;
-                }
-                break;
-            }
-
-            $pos_ord = ord(strtolower($this->html[$pos]));
-            if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || in_array($pos_ord, static::$special_ords)) {
-                // attribute name
-                $currAttrib .= $this->html[$pos];
-            } elseif (in_array($this->html[$pos], array(' ', "\t", "\n"))) {
-                // drop whitespace
-            } elseif (in_array($this->html[$pos] . $this->html[$pos + 1], array('="', "='"))) {
-                // get attribute value
-                $pos++;
-                $await = $this->html[$pos]; // single or double quote
-                $pos++;
-                $value = '';
-                while (isset($this->html[$pos]) && $this->html[$pos] != $await) {
-                    $value .= $this->html[$pos];
-                    $pos++;
-                }
-                $attributes[$currAttrib] = $value;
-                $currAttrib = '';
-            } else {
-                $this->invalidTag();
-
-                return false;
-            }
-        }
-        if ($this->html[$pos] != '>') {
-            $this->invalidTag();
-
-            return false;
-        }
-
-        if (!empty($currAttrib)) {
-            // html 4 allows something like