aboutsummaryrefslogtreecommitdiffstats
path: root/library/HTMLPurifier/AttrDef/HTML
diff options
context:
space:
mode:
authorMike Macgirvin <mike@macgirvin.com>2010-09-08 20:14:17 -0700
committerMike Macgirvin <mike@macgirvin.com>2010-09-08 20:14:17 -0700
commitffb1997902facb36b78a7cfa522f41f2b3d71cda (patch)
treee9fe47acf26c5fd2c742677f2610b60d3008eb26 /library/HTMLPurifier/AttrDef/HTML
parentb49858b038a0a05bbe7685929e88071d0e125538 (diff)
downloadvolse-hubzilla-ffb1997902facb36b78a7cfa522f41f2b3d71cda.tar.gz
volse-hubzilla-ffb1997902facb36b78a7cfa522f41f2b3d71cda.tar.bz2
volse-hubzilla-ffb1997902facb36b78a7cfa522f41f2b3d71cda.zip
mistpark 2.0 infrasturcture lands
Diffstat (limited to 'library/HTMLPurifier/AttrDef/HTML')
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Bool.php28
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Class.php34
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Color.php32
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/FrameTarget.php21
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/ID.php70
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Length.php41
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/LinkTypes.php53
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/MultiLength.php41
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Nmtokens.php52
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Pixels.php48
10 files changed, 420 insertions, 0 deletions
diff --git a/library/HTMLPurifier/AttrDef/HTML/Bool.php b/library/HTMLPurifier/AttrDef/HTML/Bool.php
new file mode 100644
index 000000000..e06987eb8
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Bool.php
@@ -0,0 +1,28 @@
+<?php
+
+/**
+ * Validates a boolean attribute
+ */
+class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
+{
+
+ protected $name;
+ public $minimized = true;
+
+ public function __construct($name = false) {$this->name = $name;}
+
+ public function validate($string, $config, $context) {
+ if (empty($string)) return false;
+ return $this->name;
+ }
+
+ /**
+ * @param $string Name of attribute
+ */
+ public function make($string) {
+ return new HTMLPurifier_AttrDef_HTML_Bool($string);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/Class.php b/library/HTMLPurifier/AttrDef/HTML/Class.php
new file mode 100644
index 000000000..370068d97
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Class.php
@@ -0,0 +1,34 @@
+<?php
+
+/**
+ * Implements special behavior for class attribute (normally NMTOKENS)
+ */
+class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
+{
+ protected function split($string, $config, $context) {
+ // really, this twiddle should be lazy loaded
+ $name = $config->getDefinition('HTML')->doctype->name;
+ if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
+ return parent::split($string, $config, $context);
+ } else {
+ return preg_split('/\s+/', $string);
+ }
+ }
+ protected function filter($tokens, $config, $context) {
+ $allowed = $config->get('Attr.AllowedClasses');
+ $forbidden = $config->get('Attr.ForbiddenClasses');
+ $ret = array();
+ foreach ($tokens as $token) {
+ if (
+ ($allowed === null || isset($allowed[$token])) &&
+ !isset($forbidden[$token]) &&
+ // We need this O(n) check because of PHP's array
+ // implementation that casts -0 to 0.
+ !in_array($token, $ret, true)
+ ) {
+ $ret[] = $token;
+ }
+ }
+ return $ret;
+ }
+}
diff --git a/library/HTMLPurifier/AttrDef/HTML/Color.php b/library/HTMLPurifier/AttrDef/HTML/Color.php
new file mode 100644
index 000000000..d01e20454
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Color.php
@@ -0,0 +1,32 @@
+<?php
+
+/**
+ * Validates a color according to the HTML spec.
+ */
+class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
+{
+
+ public function validate($string, $config, $context) {
+
+ static $colors = null;
+ if ($colors === null) $colors = $config->get('Core.ColorKeywords');
+
+ $string = trim($string);
+
+ if (empty($string)) return false;
+ if (isset($colors[$string])) return $colors[$string];
+ if ($string[0] === '#') $hex = substr($string, 1);
+ else $hex = $string;
+
+ $length = strlen($hex);
+ if ($length !== 3 && $length !== 6) return false;
+ if (!ctype_xdigit($hex)) return false;
+ if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2];
+
+ return "#$hex";
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php
new file mode 100644
index 000000000..ae6ea7c01
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php
@@ -0,0 +1,21 @@
+<?php
+
+/**
+ * Special-case enum attribute definition that lazy loads allowed frame targets
+ */
+class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
+{
+
+ public $valid_values = false; // uninitialized value
+ protected $case_sensitive = false;
+
+ public function __construct() {}
+
+ public function validate($string, $config, $context) {
+ if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets');
+ return parent::validate($string, $config, $context);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/ID.php b/library/HTMLPurifier/AttrDef/HTML/ID.php
new file mode 100644
index 000000000..81d03762d
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/ID.php
@@ -0,0 +1,70 @@
+<?php
+
+/**
+ * Validates the HTML attribute ID.
+ * @warning Even though this is the id processor, it
+ * will ignore the directive Attr:IDBlacklist, since it will only
+ * go according to the ID accumulator. Since the accumulator is
+ * automatically generated, it will have already absorbed the
+ * blacklist. If you're hacking around, make sure you use load()!
+ */
+
+class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
+{
+
+ // ref functionality disabled, since we also have to verify
+ // whether or not the ID it refers to exists
+
+ public function validate($id, $config, $context) {
+
+ if (!$config->get('Attr.EnableID')) return false;
+
+ $id = trim($id); // trim it first
+
+ if ($id === '') return false;
+
+ $prefix = $config->get('Attr.IDPrefix');
+ if ($prefix !== '') {
+ $prefix .= $config->get('Attr.IDPrefixLocal');
+ // prevent re-appending the prefix
+ if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
+ } elseif ($config->get('Attr.IDPrefixLocal') !== '') {
+ trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
+ '%Attr.IDPrefix is set', E_USER_WARNING);
+ }
+
+ //if (!$this->ref) {
+ $id_accumulator =& $context->get('IDAccumulator');
+ if (isset($id_accumulator->ids[$id])) return false;
+ //}
+
+ // we purposely avoid using regex, hopefully this is faster
+
+ if (ctype_alpha($id)) {
+ $result = true;
+ } else {
+ if (!ctype_alpha(@$id[0])) return false;
+ $trim = trim( // primitive style of regexps, I suppose
+ $id,
+ 'A..Za..z0..9:-._'
+ );
+ $result = ($trim === '');
+ }
+
+ $regexp = $config->get('Attr.IDBlacklistRegexp');
+ if ($regexp && preg_match($regexp, $id)) {
+ return false;
+ }
+
+ if (/*!$this->ref && */$result) $id_accumulator->add($id);
+
+ // if no change was made to the ID, return the result
+ // else, return the new id if stripping whitespace made it
+ // valid, or return false.
+ return $result ? $id : false;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/Length.php b/library/HTMLPurifier/AttrDef/HTML/Length.php
new file mode 100644
index 000000000..a242f9c23
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Length.php
@@ -0,0 +1,41 @@
+<?php
+
+/**
+ * Validates the HTML type length (not to be confused with CSS's length).
+ *
+ * This accepts integer pixels or percentages as lengths for certain
+ * HTML attributes.
+ */
+
+class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
+{
+
+ public function validate($string, $config, $context) {
+
+ $string = trim($string);
+ if ($string === '') return false;
+
+ $parent_result = parent::validate($string, $config, $context);
+ if ($parent_result !== false) return $parent_result;
+
+ $length = strlen($string);
+ $last_char = $string[$length - 1];
+
+ if ($last_char !== '%') return false;
+
+ $points = substr($string, 0, $length - 1);
+
+ if (!is_numeric($points)) return false;
+
+ $points = (int) $points;
+
+ if ($points < 0) return '0%';
+ if ($points > 100) return '100%';
+
+ return ((string) $points) . '%';
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php b/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
new file mode 100644
index 000000000..76d25ed08
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
@@ -0,0 +1,53 @@
+<?php
+
+/**
+ * Validates a rel/rev link attribute against a directive of allowed values
+ * @note We cannot use Enum because link types allow multiple
+ * values.
+ * @note Assumes link types are ASCII text
+ */
+class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
+{
+
+ /** Name config attribute to pull. */
+ protected $name;
+
+ public function __construct($name) {
+ $configLookup = array(
+ 'rel' => 'AllowedRel',
+ 'rev' => 'AllowedRev'
+ );
+ if (!isset($configLookup[$name])) {
+ trigger_error('Unrecognized attribute name for link '.
+ 'relationship.', E_USER_ERROR);
+ return;
+ }
+ $this->name = $configLookup[$name];
+ }
+
+ public function validate($string, $config, $context) {
+
+ $allowed = $config->get('Attr.' . $this->name);
+ if (empty($allowed)) return false;
+
+ $string = $this->parseCDATA($string);
+ $parts = explode(' ', $string);
+
+ // lookup to prevent duplicates
+ $ret_lookup = array();
+ foreach ($parts as $part) {
+ $part = strtolower(trim($part));
+ if (!isset($allowed[$part])) continue;
+ $ret_lookup[$part] = true;
+ }
+
+ if (empty($ret_lookup)) return false;
+ $string = implode(' ', array_keys($ret_lookup));
+
+ return $string;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/MultiLength.php b/library/HTMLPurifier/AttrDef/HTML/MultiLength.php
new file mode 100644
index 000000000..c72fc76e4
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/MultiLength.php
@@ -0,0 +1,41 @@
+<?php
+
+/**
+ * Validates a MultiLength as defined by the HTML spec.
+ *
+ * A multilength is either a integer (pixel count), a percentage, or
+ * a relative number.
+ */
+class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
+{
+
+ public function validate($string, $config, $context) {
+
+ $string = trim($string);
+ if ($string === '') return false;
+
+ $parent_result = parent::validate($string, $config, $context);
+ if ($parent_result !== false) return $parent_result;
+
+ $length = strlen($string);
+ $last_char = $string[$length - 1];
+
+ if ($last_char !== '*') return false;
+
+ $int = substr($string, 0, $length - 1);
+
+ if ($int == '') return '*';
+ if (!is_numeric($int)) return false;
+
+ $int = (int) $int;
+
+ if ($int < 0) return false;
+ if ($int == 0) return '0';
+ if ($int == 1) return '*';
+ return ((string) $int) . '*';
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
new file mode 100644
index 000000000..aa34120bd
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
@@ -0,0 +1,52 @@
+<?php
+
+/**
+ * Validates contents based on NMTOKENS attribute type.
+ */
+class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
+{
+
+ public function validate($string, $config, $context) {
+
+ $string = trim($string);
+
+ // early abort: '' and '0' (strings that convert to false) are invalid
+ if (!$string) return false;
+
+ $tokens = $this->split($string, $config, $context);
+ $tokens = $this->filter($tokens, $config, $context);
+ if (empty($tokens)) return false;
+ return implode(' ', $tokens);
+
+ }
+
+ /**
+ * Splits a space separated list of tokens into its constituent parts.
+ */
+ protected function split($string, $config, $context) {
+ // OPTIMIZABLE!
+ // do the preg_match, capture all subpatterns for reformulation
+
+ // we don't support U+00A1 and up codepoints or
+ // escaping because I don't know how to do that with regexps
+ // and plus it would complicate optimization efforts (you never
+ // see that anyway).
+ $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
+ '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
+ '(?:(?=\s)|\z)/'; // look ahead for space or string end
+ preg_match_all($pattern, $string, $matches);
+ return $matches[1];
+ }
+
+ /**
+ * Template method for removing certain tokens based on arbitrary criteria.
+ * @note If we wanted to be really functional, we'd do an array_filter
+ * with a callback. But... we're not.
+ */
+ protected function filter($tokens, $config, $context) {
+ return $tokens;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/Pixels.php b/library/HTMLPurifier/AttrDef/HTML/Pixels.php
new file mode 100644
index 000000000..4cb2c1b85
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Pixels.php
@@ -0,0 +1,48 @@
+<?php
+
+/**
+ * Validates an integer representation of pixels according to the HTML spec.
+ */
+class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
+{
+
+ protected $max;
+
+ public function __construct($max = null) {
+ $this->max = $max;
+ }
+
+ public function validate($string, $config, $context) {
+
+ $string = trim($string);
+ if ($string === '0') return $string;
+ if ($string === '') return false;
+ $length = strlen($string);
+ if (substr($string, $length - 2) == 'px') {
+ $string = substr($string, 0, $length - 2);
+ }
+ if (!is_numeric($string)) return false;
+ $int = (int) $string;
+
+ if ($int < 0) return '0';
+
+ // upper-bound value, extremely high values can
+ // crash operating systems, see <http://ha.ckers.org/imagecrash.html>
+ // WARNING, above link WILL crash you if you're using Windows
+
+ if ($this->max !== null && $int > $this->max) return (string) $this->max;
+
+ return (string) $int;
+
+ }
+
+ public function make($string) {
+ if ($string === '') $max = null;
+ else $max = (int) $string;
+ $class = get_class($this);
+ return new $class($max);
+ }
+
+}
+
+// vim: et sw=4 sts=4