diff options
author | friendica <info@friendica.com> | 2015-01-01 22:18:27 -0800 |
---|---|---|
committer | friendica <info@friendica.com> | 2015-01-01 22:18:27 -0800 |
commit | a0052f0176bd079e6a94baec59fea2ec5a8d651e (patch) | |
tree | c323edd823681bc2e8ca757e7eaf8354d42c7b51 /library/HTMLPurifier/Generator.php | |
parent | 545e47933a0816699c68d98a7742a03260d6a54f (diff) | |
download | volse-hubzilla-a0052f0176bd079e6a94baec59fea2ec5a8d651e.tar.gz volse-hubzilla-a0052f0176bd079e6a94baec59fea2ec5a8d651e.tar.bz2 volse-hubzilla-a0052f0176bd079e6a94baec59fea2ec5a8d651e.zip |
htmlpurifier update - compatibility issue with language library autoloader
Diffstat (limited to 'library/HTMLPurifier/Generator.php')
-rw-r--r-- | library/HTMLPurifier/Generator.php | 164 |
1 files changed, 113 insertions, 51 deletions
diff --git a/library/HTMLPurifier/Generator.php b/library/HTMLPurifier/Generator.php index 4a6241727..6fb568714 100644 --- a/library/HTMLPurifier/Generator.php +++ b/library/HTMLPurifier/Generator.php @@ -11,49 +11,64 @@ class HTMLPurifier_Generator { /** - * Whether or not generator should produce XML output + * Whether or not generator should produce XML output. + * @type bool */ private $_xhtml = true; /** - * :HACK: Whether or not generator should comment the insides of <script> tags + * :HACK: Whether or not generator should comment the insides of <script> tags. + * @type bool */ private $_scriptFix = false; /** * Cache of HTMLDefinition during HTML output to determine whether or * not attributes should be minimized. + * @type HTMLPurifier_HTMLDefinition */ private $_def; /** - * Cache of %Output.SortAttr + * Cache of %Output.SortAttr. + * @type bool */ private $_sortAttr; /** - * Cache of %Output.FlashCompat + * Cache of %Output.FlashCompat. + * @type bool */ private $_flashCompat; /** + * Cache of %Output.FixInnerHTML. + * @type bool + */ + private $_innerHTMLFix; + + /** * Stack for keeping track of object information when outputting IE * compatibility code. + * @type array */ private $_flashStack = array(); /** * Configuration for the generator + * @type HTMLPurifier_Config */ protected $config; /** - * @param $config Instance of HTMLPurifier_Config - * @param $context Instance of HTMLPurifier_Context + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context */ - public function __construct($config, $context) { + public function __construct($config, $context) + { $this->config = $config; $this->_scriptFix = $config->get('Output.CommentScriptContents'); + $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); $this->_sortAttr = $config->get('Output.SortAttr'); $this->_flashCompat = $config->get('Output.FlashCompat'); $this->_def = $config->getHTMLDefinition(); @@ -62,12 +77,14 @@ class HTMLPurifier_Generator /** * Generates HTML from an array of tokens. - * @param $tokens Array of HTMLPurifier_Token - * @param $config HTMLPurifier_Config object - * @return Generated HTML + * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token + * @return string Generated HTML */ - public function generateFromTokens($tokens) { - if (!$tokens) return ''; + public function generateFromTokens($tokens) + { + if (!$tokens) { + return ''; + } // Basic algorithm $html = ''; @@ -86,30 +103,41 @@ class HTMLPurifier_Generator // Tidy cleanup if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { $tidy = new Tidy; - $tidy->parseString($html, array( - 'indent'=> true, - 'output-xhtml' => $this->_xhtml, - 'show-body-only' => true, - 'indent-spaces' => 2, - 'wrap' => 68, - ), 'utf8'); + $tidy->parseString( + $html, + array( + 'indent'=> true, + 'output-xhtml' => $this->_xhtml, + 'show-body-only' => true, + 'indent-spaces' => 2, + 'wrap' => 68, + ), + 'utf8' + ); $tidy->cleanRepair(); $html = (string) $tidy; // explicit cast necessary } // Normalize newlines to system defined value - $nl = $this->config->get('Output.Newline'); - if ($nl === null) $nl = PHP_EOL; - if ($nl !== "\n") $html = str_replace("\n", $nl, $html); + if ($this->config->get('Core.NormalizeNewlines')) { + $nl = $this->config->get('Output.Newline'); + if ($nl === null) { + $nl = PHP_EOL; + } + if ($nl !== "\n") { + $html = str_replace("\n", $nl, $html); + } + } return $html; } /** * Generates HTML from a single token. - * @param $token HTMLPurifier_Token object. - * @return Generated HTML + * @param HTMLPurifier_Token $token HTMLPurifier_Token object. + * @return string Generated HTML */ - public function generateFromToken($token) { + public function generateFromToken($token) + { if (!$token instanceof HTMLPurifier_Token) { trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); return ''; @@ -130,19 +158,7 @@ class HTMLPurifier_Generator $_extra = ''; if ($this->_flashCompat) { if ($token->name == "object" && !empty($this->_flashStack)) { - $flash = array_pop($this->_flashStack); - $compat_token = new HTMLPurifier_Token_Empty("embed"); - foreach ($flash->attr as $name => $val) { - if ($name == "classid") continue; - if ($name == "type") continue; - if ($name == "data") $name = "src"; - $compat_token->attr[$name] = $val; - } - foreach ($flash->param as $name => $val) { - if ($name == "movie") $name = "src"; - $compat_token->attr[$name] = $val; - } - $_extra = "<!--[if IE]>".$this->generateFromToken($compat_token)."<![endif]-->"; + // doesn't do anything for now } } return $_extra . '</' . $token->name . '>'; @@ -169,11 +185,16 @@ class HTMLPurifier_Generator /** * Special case processor for the contents of script tags + * @param HTMLPurifier_Token $token HTMLPurifier_Token object. + * @return string * @warning This runs into problems if there's already a literal * --> somewhere inside the script contents. */ - public function generateScriptFromToken($token) { - if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); + public function generateScriptFromToken($token) + { + if (!$token instanceof HTMLPurifier_Token_Text) { + return $this->generateFromToken($token); + } // Thanks <http://lachy.id.au/log/2005/05/script-comments> $data = preg_replace('#//\s*$#', '', $token->data); return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; @@ -182,24 +203,60 @@ class HTMLPurifier_Generator /** * Generates attribute declarations from attribute array. * @note This does not include the leading or trailing space. - * @param $assoc_array_of_attributes Attribute array - * @param $element Name of element attributes are for, used to check + * @param array $assoc_array_of_attributes Attribute array + * @param string $element Name of element attributes are for, used to check * attribute minimization. - * @return Generate HTML fragment for insertion. + * @return string Generated HTML fragment for insertion. */ - public function generateAttributes($assoc_array_of_attributes, $element = false) { + public function generateAttributes($assoc_array_of_attributes, $element = '') + { $html = ''; - if ($this->_sortAttr) ksort($assoc_array_of_attributes); + if ($this->_sortAttr) { + ksort($assoc_array_of_attributes); + } foreach ($assoc_array_of_attributes as $key => $value) { if (!$this->_xhtml) { // Remove namespaced attributes - if (strpos($key, ':') !== false) continue; + if (strpos($key, ':') !== false) { + continue; + } // Check if we should minimize the attribute: val="val" -> val if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { $html .= $key . ' '; continue; } } + // Workaround for Internet Explorer innerHTML bug. + // Essentially, Internet Explorer, when calculating + // innerHTML, omits quotes if there are no instances of + // angled brackets, quotes or spaces. However, when parsing + // HTML (for example, when you assign to innerHTML), it + // treats backticks as quotes. Thus, + // <img alt="``" /> + // becomes + // <img alt=`` /> + // becomes + // <img alt='' /> + // Fortunately, all we need to do is trigger an appropriate + // quoting style, which we do by adding an extra space. + // This also is consistent with the W3C spec, which states + // that user agents may ignore leading or trailing + // whitespace (in fact, most don't, at least for attributes + // like alt, but an extra space at the end is barely + // noticeable). Still, we have a configuration knob for + // this, since this transformation is not necesary if you + // don't process user input with innerHTML or you don't plan + // on supporting Internet Explorer. + if ($this->_innerHTMLFix) { + if (strpos($value, '`') !== false) { + // check if correct quoting style would not already be + // triggered + if (strcspn($value, '"\' <>') === strlen($value)) { + // protect! + $value .= ' '; + } + } + } $html .= $key.'="'.$this->escape($value).'" '; } return rtrim($html); @@ -210,15 +267,20 @@ class HTMLPurifier_Generator * @todo This really ought to be protected, but until we have a facility * for properly generating HTML here w/o using tokens, it stays * public. - * @param $string String data to escape for HTML. - * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is + * @param string $string String data to escape for HTML. + * @param int $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is * permissible for non-attribute output. - * @return String escaped data. + * @return string escaped data. */ - public function escape($string, $quote = ENT_COMPAT) { + public function escape($string, $quote = null) + { + // Workaround for APC bug on Mac Leopard reported by sidepodcast + // http://htmlpurifier.org/phorum/read.php?3,4823,4846 + if ($quote === null) { + $quote = ENT_COMPAT; + } return htmlspecialchars($string, $quote, 'UTF-8'); } - } // vim: et sw=4 sts=4 |