diff options
author | Mike Macgirvin <mike@macgirvin.com> | 2010-09-08 20:14:17 -0700 |
---|---|---|
committer | Mike Macgirvin <mike@macgirvin.com> | 2010-09-08 20:14:17 -0700 |
commit | ffb1997902facb36b78a7cfa522f41f2b3d71cda (patch) | |
tree | e9fe47acf26c5fd2c742677f2610b60d3008eb26 /library/HTMLPurifier/HTMLModule | |
parent | b49858b038a0a05bbe7685929e88071d0e125538 (diff) | |
download | volse-hubzilla-ffb1997902facb36b78a7cfa522f41f2b3d71cda.tar.gz volse-hubzilla-ffb1997902facb36b78a7cfa522f41f2b3d71cda.tar.bz2 volse-hubzilla-ffb1997902facb36b78a7cfa522f41f2b3d71cda.zip |
mistpark 2.0 infrasturcture lands
Diffstat (limited to 'library/HTMLPurifier/HTMLModule')
29 files changed, 1444 insertions, 0 deletions
diff --git a/library/HTMLPurifier/HTMLModule/Bdo.php b/library/HTMLPurifier/HTMLModule/Bdo.php new file mode 100644 index 000000000..3d66f1b4e --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Bdo.php @@ -0,0 +1,31 @@ +<?php + +/** + * XHTML 1.1 Bi-directional Text Module, defines elements that + * declare directionality of content. Text Extension Module. + */ +class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule +{ + + public $name = 'Bdo'; + public $attr_collections = array( + 'I18N' => array('dir' => false) + ); + + public function setup($config) { + $bdo = $this->addElement( + 'bdo', 'Inline', 'Inline', array('Core', 'Lang'), + array( + 'dir' => 'Enum#ltr,rtl', // required + // The Abstract Module specification has the attribute + // inclusions wrong for bdo: bdo allows Lang + ) + ); + $bdo->attr_transform_post['required-dir'] = new HTMLPurifier_AttrTransform_BdoDir(); + + $this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl'; + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/CommonAttributes.php b/library/HTMLPurifier/HTMLModule/CommonAttributes.php new file mode 100644 index 000000000..7c15da84f --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/CommonAttributes.php @@ -0,0 +1,26 @@ +<?php + +class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule +{ + public $name = 'CommonAttributes'; + + public $attr_collections = array( + 'Core' => array( + 0 => array('Style'), + // 'xml:space' => false, + 'class' => 'Class', + 'id' => 'ID', + 'title' => 'CDATA', + ), + 'Lang' => array(), + 'I18N' => array( + 0 => array('Lang'), // proprietary, for xml:lang/lang + ), + 'Common' => array( + 0 => array('Core', 'I18N') + ) + ); + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Edit.php b/library/HTMLPurifier/HTMLModule/Edit.php new file mode 100644 index 000000000..ff9369055 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Edit.php @@ -0,0 +1,38 @@ +<?php + +/** + * XHTML 1.1 Edit Module, defines editing-related elements. Text Extension + * Module. + */ +class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule +{ + + public $name = 'Edit'; + + public function setup($config) { + $contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow'; + $attr = array( + 'cite' => 'URI', + // 'datetime' => 'Datetime', // not implemented + ); + $this->addElement('del', 'Inline', $contents, 'Common', $attr); + $this->addElement('ins', 'Inline', $contents, 'Common', $attr); + } + + // HTML 4.01 specifies that ins/del must not contain block + // elements when used in an inline context, chameleon is + // a complicated workaround to acheive this effect + + // Inline context ! Block context (exclamation mark is + // separator, see getChildDef for parsing) + + public $defines_child_def = true; + public function getChildDef($def) { + if ($def->content_model_type != 'chameleon') return false; + $value = explode('!', $def->content_model); + return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Forms.php b/library/HTMLPurifier/HTMLModule/Forms.php new file mode 100644 index 000000000..44c22f6f8 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Forms.php @@ -0,0 +1,118 @@ +<?php + +/** + * XHTML 1.1 Forms module, defines all form-related elements found in HTML 4. + */ +class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule +{ + public $name = 'Forms'; + public $safe = false; + + public $content_sets = array( + 'Block' => 'Form', + 'Inline' => 'Formctrl', + ); + + public function setup($config) { + $form = $this->addElement('form', 'Form', + 'Required: Heading | List | Block | fieldset', 'Common', array( + 'accept' => 'ContentTypes', + 'accept-charset' => 'Charsets', + 'action*' => 'URI', + 'method' => 'Enum#get,post', + // really ContentType, but these two are the only ones used today + 'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data', + )); + $form->excludes = array('form' => true); + + $input = $this->addElement('input', 'Formctrl', 'Empty', 'Common', array( + 'accept' => 'ContentTypes', + 'accesskey' => 'Character', + 'alt' => 'Text', + 'checked' => 'Bool#checked', + 'disabled' => 'Bool#disabled', + 'maxlength' => 'Number', + 'name' => 'CDATA', + 'readonly' => 'Bool#readonly', + 'size' => 'Number', + 'src' => 'URI#embeds', + 'tabindex' => 'Number', + 'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image', + 'value' => 'CDATA', + )); + $input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input(); + + $this->addElement('select', 'Formctrl', 'Required: optgroup | option', 'Common', array( + 'disabled' => 'Bool#disabled', + 'multiple' => 'Bool#multiple', + 'name' => 'CDATA', + 'size' => 'Number', + 'tabindex' => 'Number', + )); + + $this->addElement('option', false, 'Optional: #PCDATA', 'Common', array( + 'disabled' => 'Bool#disabled', + 'label' => 'Text', + 'selected' => 'Bool#selected', + 'value' => 'CDATA', + )); + // It's illegal for there to be more than one selected, but not + // be multiple. Also, no selected means undefined behavior. This might + // be difficult to implement; perhaps an injector, or a context variable. + + $textarea = $this->addElement('textarea', 'Formctrl', 'Optional: #PCDATA', 'Common', array( + 'accesskey' => 'Character', + 'cols*' => 'Number', + 'disabled' => 'Bool#disabled', + 'name' => 'CDATA', + 'readonly' => 'Bool#readonly', + 'rows*' => 'Number', + 'tabindex' => 'Number', + )); + $textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea(); + + $button = $this->addElement('button', 'Formctrl', 'Optional: #PCDATA | Heading | List | Block | Inline', 'Common', array( + 'accesskey' => 'Character', + 'disabled' => 'Bool#disabled', + 'name' => 'CDATA', + 'tabindex' => 'Number', + 'type' => 'Enum#button,submit,reset', + 'value' => 'CDATA', + )); + + // For exclusions, ideally we'd specify content sets, not literal elements + $button->excludes = $this->makeLookup( + 'form', 'fieldset', // Form + 'input', 'select', 'textarea', 'label', 'button', // Formctrl + 'a' // as per HTML 4.01 spec, this is omitted by modularization + ); + + // Extra exclusion: img usemap="" is not permitted within this element. + // We'll omit this for now, since we don't have any good way of + // indicating it yet. + + // This is HIGHLY user-unfriendly; we need a custom child-def for this + $this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common'); + + $label = $this->addElement('label', 'Formctrl', 'Optional: #PCDATA | Inline', 'Common', array( + 'accesskey' => 'Character', + // 'for' => 'IDREF', // IDREF not implemented, cannot allow + )); + $label->excludes = array('label' => true); + + $this->addElement('legend', false, 'Optional: #PCDATA | Inline', 'Common', array( + 'accesskey' => 'Character', + )); + + $this->addElement('optgroup', false, 'Required: option', 'Common', array( + 'disabled' => 'Bool#disabled', + 'label*' => 'Text', + )); + + // Don't forget an injector for <isindex>. This one's a little complex + // because it maps to multiple elements. + + } +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Hypertext.php b/library/HTMLPurifier/HTMLModule/Hypertext.php new file mode 100644 index 000000000..d7e9bdd27 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Hypertext.php @@ -0,0 +1,31 @@ +<?php + +/** + * XHTML 1.1 Hypertext Module, defines hypertext links. Core Module. + */ +class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule +{ + + public $name = 'Hypertext'; + + public function setup($config) { + $a = $this->addElement( + 'a', 'Inline', 'Inline', 'Common', + array( + // 'accesskey' => 'Character', + // 'charset' => 'Charset', + 'href' => 'URI', + // 'hreflang' => 'LanguageCode', + 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'), + 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'), + // 'tabindex' => 'Number', + // 'type' => 'ContentType', + ) + ); + $a->formatting = true; + $a->excludes = array('a' => true); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Image.php b/library/HTMLPurifier/HTMLModule/Image.php new file mode 100644 index 000000000..948d435bc --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Image.php @@ -0,0 +1,40 @@ +<?php + +/** + * XHTML 1.1 Image Module provides basic image embedding. + * @note There is specialized code for removing empty images in + * HTMLPurifier_Strategy_RemoveForeignElements + */ +class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule +{ + + public $name = 'Image'; + + public function setup($config) { + $max = $config->get('HTML.MaxImgLength'); + $img = $this->addElement( + 'img', 'Inline', 'Empty', 'Common', + array( + 'alt*' => 'Text', + // According to the spec, it's Length, but percents can + // be abused, so we allow only Pixels. + 'height' => 'Pixels#' . $max, + 'width' => 'Pixels#' . $max, + 'longdesc' => 'URI', + 'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded + ) + ); + if ($max === null || $config->get('HTML.Trusted')) { + $img->attr['height'] = + $img->attr['width'] = 'Length'; + } + + // kind of strange, but splitting things up would be inefficient + $img->attr_transform_pre[] = + $img->attr_transform_post[] = + new HTMLPurifier_AttrTransform_ImgRequired(); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Legacy.php b/library/HTMLPurifier/HTMLModule/Legacy.php new file mode 100644 index 000000000..df33927ba --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Legacy.php @@ -0,0 +1,143 @@ +<?php + +/** + * XHTML 1.1 Legacy module defines elements that were previously + * deprecated. + * + * @note Not all legacy elements have been implemented yet, which + * is a bit of a reverse problem as compared to browsers! In + * addition, this legacy module may implement a bit more than + * mandated by XHTML 1.1. + * + * This module can be used in combination with TransformToStrict in order + * to transform as many deprecated elements as possible, but retain + * questionably deprecated elements that do not have good alternatives + * as well as transform elements that don't have an implementation. + * See docs/ref-strictness.txt for more details. + */ + +class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule +{ + + public $name = 'Legacy'; + + public function setup($config) { + + $this->addElement('basefont', 'Inline', 'Empty', false, array( + 'color' => 'Color', + 'face' => 'Text', // extremely broad, we should + 'size' => 'Text', // tighten it + 'id' => 'ID' + )); + $this->addElement('center', 'Block', 'Flow', 'Common'); + $this->addElement('dir', 'Block', 'Required: li', 'Common', array( + 'compact' => 'Bool#compact' + )); + $this->addElement('font', 'Inline', 'Inline', array('Core', 'I18N'), array( + 'color' => 'Color', + 'face' => 'Text', // extremely broad, we should + 'size' => 'Text', // tighten it + )); + $this->addElement('menu', 'Block', 'Required: li', 'Common', array( + 'compact' => 'Bool#compact' + )); + + $s = $this->addElement('s', 'Inline', 'Inline', 'Common'); + $s->formatting = true; + + $strike = $this->addElement('strike', 'Inline', 'Inline', 'Common'); + $strike->formatting = true; + + $u = $this->addElement('u', 'Inline', 'Inline', 'Common'); + $u->formatting = true; + + // setup modifications to old elements + + $align = 'Enum#left,right,center,justify'; + + $address = $this->addBlankElement('address'); + $address->content_model = 'Inline | #PCDATA | p'; + $address->content_model_type = 'optional'; + $address->child = false; + + $blockquote = $this->addBlankElement('blockquote'); + $blockquote->content_model = 'Flow | #PCDATA'; + $blockquote->content_model_type = 'optional'; + $blockquote->child = false; + + $br = $this->addBlankElement('br'); + $br->attr['clear'] = 'Enum#left,all,right,none'; + + $caption = $this->addBlankElement('caption'); + $caption->attr['align'] = 'Enum#top,bottom,left,right'; + + $div = $this->addBlankElement('div'); + $div->attr['align'] = $align; + + $dl = $this->addBlankElement('dl'); + $dl->attr['compact'] = 'Bool#compact'; + + for ($i = 1; $i <= 6; $i++) { + $h = $this->addBlankElement("h$i"); + $h->attr['align'] = $align; + } + + $hr = $this->addBlankElement('hr'); + $hr->attr['align'] = $align; + $hr->attr['noshade'] = 'Bool#noshade'; + $hr->attr['size'] = 'Pixels'; + $hr->attr['width'] = 'Length'; + + $img = $this->addBlankElement('img'); + $img->attr['align'] = 'Enum#top,middle,bottom,left,right'; + $img->attr['border'] = 'Pixels'; + $img->attr['hspace'] = 'Pixels'; + $img->attr['vspace'] = 'Pixels'; + + // figure out this integer business + + $li = $this->addBlankElement('li'); + $li->attr['value'] = new HTMLPurifier_AttrDef_Integer(); + $li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle'; + + $ol = $this->addBlankElement('ol'); + $ol->attr['compact'] = 'Bool#compact'; + $ol->attr['start'] = new HTMLPurifier_AttrDef_Integer(); + $ol->attr['type'] = 'Enum#s:1,i,I,a,A'; + + $p = $this->addBlankElement('p'); + $p->attr['align'] = $align; + + $pre = $this->addBlankElement('pre'); + $pre->attr['width'] = 'Number'; + + // script omitted + + $table = $this->addBlankElement('table'); + $table->attr['align'] = 'Enum#left,center,right'; + $table->attr['bgcolor'] = 'Color'; + + $tr = $this->addBlankElement('tr'); + $tr->attr['bgcolor'] = 'Color'; + + $th = $this->addBlankElement('th'); + $th->attr['bgcolor'] = 'Color'; + $th->attr['height'] = 'Length'; + $th->attr['nowrap'] = 'Bool#nowrap'; + $th->attr['width'] = 'Length'; + + $td = $this->addBlankElement('td'); + $td->attr['bgcolor'] = 'Color'; + $td->attr['height'] = 'Length'; + $td->attr['nowrap'] = 'Bool#nowrap'; + $td->attr['width'] = 'Length'; + + $ul = $this->addBlankElement('ul'); + $ul->attr['compact'] = 'Bool#compact'; + $ul->attr['type'] = 'Enum#square,disc,circle'; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/List.php b/library/HTMLPurifier/HTMLModule/List.php new file mode 100644 index 000000000..74d4522f4 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/List.php @@ -0,0 +1,37 @@ +<?php + +/** + * XHTML 1.1 List Module, defines list-oriented elements. Core Module. + */ +class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule +{ + + public $name = 'List'; + + // According to the abstract schema, the List content set is a fully formed + // one or more expr, but it invariably occurs in an optional declaration + // so we're not going to do that subtlety. It might cause trouble + // if a user defines "List" and expects that multiple lists are + // allowed to be specified, but then again, that's not very intuitive. + // Furthermore, the actual XML Schema may disagree. Regardless, + // we don't have support for such nested expressions without using + // the incredibly inefficient and draconic Custom ChildDef. + + public $content_sets = array('Flow' => 'List'); + + public function setup($config) { + $ol = $this->addElement('ol', 'List', 'Required: li', 'Common'); + $ol->wrap = "li"; + $ul = $this->addElement('ul', 'List', 'Required: li', 'Common'); + $ul->wrap = "li"; + $this->addElement('dl', 'List', 'Required: dt | dd', 'Common'); + + $this->addElement('li', false, 'Flow', 'Common'); + + $this->addElement('dd', false, 'Flow', 'Common'); + $this->addElement('dt', false, 'Inline', 'Common'); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Name.php b/library/HTMLPurifier/HTMLModule/Name.php new file mode 100644 index 000000000..05694b450 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Name.php @@ -0,0 +1,21 @@ +<?php + +class HTMLPurifier_HTMLModule_Name extends HTMLPurifier_HTMLModule +{ + + public $name = 'Name'; + + public function setup($config) { + $elements = array('a', 'applet', 'form', 'frame', 'iframe', 'img', 'map'); + foreach ($elements as $name) { + $element = $this->addBlankElement($name); + $element->attr['name'] = 'CDATA'; + if (!$config->get('HTML.Attr.Name.UseCDATA')) { + $element->attr_transform_post['NameSync'] = new HTMLPurifier_AttrTransform_NameSync(); + } + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php b/library/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php new file mode 100644 index 000000000..5f1b14abb --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php @@ -0,0 +1,14 @@ +<?php + +class HTMLPurifier_HTMLModule_NonXMLCommonAttributes extends HTMLPurifier_HTMLModule +{ + public $name = 'NonXMLCommonAttributes'; + + public $attr_collections = array( + 'Lang' => array( + 'lang' => 'LanguageCode', + ) + ); +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Object.php b/library/HTMLPurifier/HTMLModule/Object.php new file mode 100644 index 000000000..193c1011f --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Object.php @@ -0,0 +1,47 @@ +<?php + +/** + * XHTML 1.1 Object Module, defines elements for generic object inclusion + * @warning Users will commonly use <embed> to cater to legacy browsers: this + * module does not allow this sort of behavior + */ +class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule +{ + + public $name = 'Object'; + public $safe = false; + + public function setup($config) { + + $this->addElement('object', 'Inline', 'Optional: #PCDATA | Flow | param', 'Common', + array( + 'archive' => 'URI', + 'classid' => 'URI', + 'codebase' => 'URI', + 'codetype' => 'Text', + 'data' => 'URI', + 'declare' => 'Bool#declare', + 'height' => 'Length', + 'name' => 'CDATA', + 'standby' => 'Text', + 'tabindex' => 'Number', + 'type' => 'ContentType', + 'width' => 'Length' + ) + ); + + $this->addElement('param', false, 'Empty', false, + array( + 'id' => 'ID', + 'name*' => 'Text', + 'type' => 'Text', + 'value' => 'Text', + 'valuetype' => 'Enum#data,ref,object' + ) + ); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Presentation.php b/library/HTMLPurifier/HTMLModule/Presentation.php new file mode 100644 index 000000000..8ff0b5ed7 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Presentation.php @@ -0,0 +1,36 @@ +<?php + +/** + * XHTML 1.1 Presentation Module, defines simple presentation-related + * markup. Text Extension Module. + * @note The official XML Schema and DTD specs further divide this into + * two modules: + * - Block Presentation (hr) + * - Inline Presentation (b, big, i, small, sub, sup, tt) + * We have chosen not to heed this distinction, as content_sets + * provides satisfactory disambiguation. + */ +class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule +{ + + public $name = 'Presentation'; + + public function setup($config) { + $this->addElement('hr', 'Block', 'Empty', 'Common'); + $this->addElement('sub', 'Inline', 'Inline', 'Common'); + $this->addElement('sup', 'Inline', 'Inline', 'Common'); + $b = $this->addElement('b', 'Inline', 'Inline', 'Common'); + $b->formatting = true; + $big = $this->addElement('big', 'Inline', 'Inline', 'Common'); + $big->formatting = true; + $i = $this->addElement('i', 'Inline', 'Inline', 'Common'); + $i->formatting = true; + $small = $this->addElement('small', 'Inline', 'Inline', 'Common'); + $small->formatting = true; + $tt = $this->addElement('tt', 'Inline', 'Inline', 'Common'); + $tt->formatting = true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Proprietary.php b/library/HTMLPurifier/HTMLModule/Proprietary.php new file mode 100644 index 000000000..dd36a3de0 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Proprietary.php @@ -0,0 +1,33 @@ +<?php + +/** + * Module defines proprietary tags and attributes in HTML. + * @warning If this module is enabled, standards-compliance is off! + */ +class HTMLPurifier_HTMLModule_Proprietary extends HTMLPurifier_HTMLModule +{ + + public $name = 'Proprietary'; + + public function setup($config) { + + $this->addElement('marquee', 'Inline', 'Flow', 'Common', + array( + 'direction' => 'Enum#left,right,up,down', + 'behavior' => 'Enum#alternate', + 'width' => 'Length', + 'height' => 'Length', + 'scrolldelay' => 'Number', + 'scrollamount' => 'Number', + 'loop' => 'Number', + 'bgcolor' => 'Color', + 'hspace' => 'Pixels', + 'vspace' => 'Pixels', + ) + ); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Ruby.php b/library/HTMLPurifier/HTMLModule/Ruby.php new file mode 100644 index 000000000..b26a0a30a --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Ruby.php @@ -0,0 +1,27 @@ +<?php + +/** + * XHTML 1.1 Ruby Annotation Module, defines elements that indicate + * short runs of text alongside base text for annotation or pronounciation. + */ +class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule +{ + + public $name = 'Ruby'; + + public function setup($config) { + $this->addElement('ruby', 'Inline', + 'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))', + 'Common'); + $this->addElement('rbc', false, 'Required: rb', 'Common'); + $this->addElement('rtc', false, 'Required: rt', 'Common'); + $rb = $this->addElement('rb', false, 'Inline', 'Common'); + $rb->excludes = array('ruby' => true); + $rt = $this->addElement('rt', false, 'Inline', 'Common', array('rbspan' => 'Number')); + $rt->excludes = array('ruby' => true); + $this->addElement('rp', false, 'Optional: #PCDATA', 'Common'); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/SafeEmbed.php b/library/HTMLPurifier/HTMLModule/SafeEmbed.php new file mode 100644 index 000000000..ea256716b --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/SafeEmbed.php @@ -0,0 +1,34 @@ +<?php + +/** + * A "safe" embed module. See SafeObject. This is a proprietary element. + */ +class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule +{ + + public $name = 'SafeEmbed'; + + public function setup($config) { + + $max = $config->get('HTML.MaxImgLength'); + $embed = $this->addElement( + 'embed', 'Inline', 'Empty', 'Common', + array( + 'src*' => 'URI#embedded', + 'type' => 'Enum#application/x-shockwave-flash', + 'width' => 'Pixels#' . $max, + 'height' => 'Pixels#' . $max, + 'allowscriptaccess' => 'Enum#never', + 'allownetworking' => 'Enum#internal', + 'flashvars' => 'Text', + 'wmode' => 'Enum#window', + 'name' => 'ID', + ) + ); + $embed->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeEmbed(); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/SafeObject.php b/library/HTMLPurifier/HTMLModule/SafeObject.php new file mode 100644 index 000000000..64ab8c070 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/SafeObject.php @@ -0,0 +1,53 @@ +<?php + +/** + * A "safe" object module. In theory, objects permitted by this module will + * be safe, and untrusted users can be allowed to embed arbitrary flash objects + * (maybe other types too, but only Flash is supported as of right now). + * Highly experimental. + */ +class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule +{ + + public $name = 'SafeObject'; + + public function setup($config) { + + // These definitions are not intrinsically safe: the attribute transforms + // are a vital part of ensuring safety. + + $max = $config->get('HTML.MaxImgLength'); + $object = $this->addElement( + 'object', + 'Inline', + 'Optional: param | Flow | #PCDATA', + 'Common', + array( + // While technically not required by the spec, we're forcing + // it to this value. + 'type' => 'Enum#application/x-shockwave-flash', + 'width' => 'Pixels#' . $max, + 'height' => 'Pixels#' . $max, + 'data' => 'URI#embedded', + 'classid' => 'Enum#clsid:d27cdb6e-ae6d-11cf-96b8-444553540000', + 'codebase' => new HTMLPurifier_AttrDef_Enum(array( + 'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')), + ) + ); + $object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject(); + + $param = $this->addElement('param', false, 'Empty', false, + array( + 'id' => 'ID', + 'name*' => 'Text', + 'value' => 'Text' + ) + ); + $param->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeParam(); + $this->info_injector[] = 'SafeObject'; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Scripting.php b/library/HTMLPurifier/HTMLModule/Scripting.php new file mode 100644 index 000000000..cecdea6c3 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Scripting.php @@ -0,0 +1,54 @@ +<?php + +/* + +WARNING: THIS MODULE IS EXTREMELY DANGEROUS AS IT ENABLES INLINE SCRIPTING +INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!! + +*/ + +/** + * XHTML 1.1 Scripting module, defines elements that are used to contain + * information pertaining to executable scripts or the lack of support + * for executable scripts. + * @note This module does not contain inline scripting elements + */ +class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule +{ + public $name = 'Scripting'; + public $elements = array('script', 'noscript'); + public $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript'); + public $safe = false; + + public function setup($config) { + // TODO: create custom child-definition for noscript that + // auto-wraps stray #PCDATA in a similar manner to + // blockquote's custom definition (we would use it but + // blockquote's contents are optional while noscript's contents + // are required) + + // TODO: convert this to new syntax, main problem is getting + // both content sets working + + // In theory, this could be safe, but I don't see any reason to + // allow it. + $this->info['noscript'] = new HTMLPurifier_ElementDef(); + $this->info['noscript']->attr = array( 0 => array('Common') ); + $this->info['noscript']->content_model = 'Heading | List | Block'; + $this->info['noscript']->content_model_type = 'required'; + + $this->info['script'] = new HTMLPurifier_ElementDef(); + $this->info['script']->attr = array( + 'defer' => new HTMLPurifier_AttrDef_Enum(array('defer')), + 'src' => new HTMLPurifier_AttrDef_URI(true), + 'type' => new HTMLPurifier_AttrDef_Enum(array('text/javascript')) + ); + $this->info['script']->content_model = '#PCDATA'; + $this->info['script']->content_model_type = 'optional'; + $this->info['script']->attr_transform_pre['type'] = + $this->info['script']->attr_transform_post['type'] = + new HTMLPurifier_AttrTransform_ScriptRequired(); + } +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/StyleAttribute.php b/library/HTMLPurifier/HTMLModule/StyleAttribute.php new file mode 100644 index 000000000..eb78464cc --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/StyleAttribute.php @@ -0,0 +1,24 @@ +<?php + +/** + * XHTML 1.1 Edit Module, defines editing-related elements. Text Extension + * Module. + */ +class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule +{ + + public $name = 'StyleAttribute'; + public $attr_collections = array( + // The inclusion routine differs from the Abstract Modules but + // is in line with the DTD and XML Schemas. + 'Style' => array('style' => false), // see constructor + 'Core' => array(0 => array('Style')) + ); + + public function setup($config) { + $this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS(); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Tables.php b/library/HTMLPurifier/HTMLModule/Tables.php new file mode 100644 index 000000000..f314ced3f --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tables.php @@ -0,0 +1,66 @@ +<?php + +/** + * XHTML 1.1 Tables Module, fully defines accessible table elements. + */ +class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule +{ + + public $name = 'Tables'; + + public function setup($config) { + + $this->addElement('caption', false, 'Inline', 'Common'); + + $this->addElement('table', 'Block', + new HTMLPurifier_ChildDef_Table(), 'Common', + array( + 'border' => 'Pixels', + 'cellpadding' => 'Length', + 'cellspacing' => 'Length', + 'frame' => 'Enum#void,above,below,hsides,lhs,rhs,vsides,box,border', + 'rules' => 'Enum#none,groups,rows,cols,all', + 'summary' => 'Text', + 'width' => 'Length' + ) + ); + + // common attributes + $cell_align = array( + 'align' => 'Enum#left,center,right,justify,char', + 'charoff' => 'Length', + 'valign' => 'Enum#top,middle,bottom,baseline', + ); + + $cell_t = array_merge( + array( + 'abbr' => 'Text', + 'colspan' => 'Number', + 'rowspan' => 'Number', + ), + $cell_align + ); + $this->addElement('td', false, 'Flow', 'Common', $cell_t); + $this->addElement('th', false, 'Flow', 'Common', $cell_t); + + $this->addElement('tr', false, 'Required: td | th', 'Common', $cell_align); + + $cell_col = array_merge( + array( + 'span' => 'Number', + 'width' => 'MultiLength', + ), + $cell_align + ); + $this->addElement('col', false, 'Empty', 'Common', $cell_col); + $this->addElement('colgroup', false, 'Optional: col', 'Common', $cell_col); + + $this->addElement('tbody', false, 'Required: tr', 'Common', $cell_align); + $this->addElement('thead', false, 'Required: tr', 'Common', $cell_align); + $this->addElement('tfoot', false, 'Required: tr', 'Common', $cell_align); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Target.php b/library/HTMLPurifier/HTMLModule/Target.php new file mode 100644 index 000000000..2b844ecc4 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Target.php @@ -0,0 +1,23 @@ +<?php + +/** + * XHTML 1.1 Target Module, defines target attribute in link elements. + */ +class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule +{ + + public $name = 'Target'; + + public function setup($config) { + $elements = array('a'); + foreach ($elements as $name) { + $e = $this->addBlankElement($name); + $e->attr = array( + 'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget() + ); + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Text.php b/library/HTMLPurifier/HTMLModule/Text.php new file mode 100644 index 000000000..ae77c7188 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Text.php @@ -0,0 +1,71 @@ +<?php + +/** + * XHTML 1.1 Text Module, defines basic text containers. Core Module. + * @note In the normative XML Schema specification, this module + * is further abstracted into the following modules: + * - Block Phrasal (address, blockquote, pre, h1, h2, h3, h4, h5, h6) + * - Block Structural (div, p) + * - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var) + * - Inline Structural (br, span) + * This module, functionally, does not distinguish between these + * sub-modules, but the code is internally structured to reflect + * these distinctions. + */ +class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule +{ + + public $name = 'Text'; + public $content_sets = array( + 'Flow' => 'Heading | Block | Inline' + ); + + public function setup($config) { + + // Inline Phrasal ------------------------------------------------- + $this->addElement('abbr', 'Inline', 'Inline', 'Common'); + $this->addElement('acronym', 'Inline', 'Inline', 'Common'); + $this->addElement('cite', 'Inline', 'Inline', 'Common'); + $this->addElement('dfn', 'Inline', 'Inline', 'Common'); + $this->addElement('kbd', 'Inline', 'Inline', 'Common'); + $this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI')); + $this->addElement('samp', 'Inline', 'Inline', 'Common'); + $this->addElement('var', 'Inline', 'Inline', 'Common'); + + $em = $this->addElement('em', 'Inline', 'Inline', 'Common'); + $em->formatting = true; + + $strong = $this->addElement('strong', 'Inline', 'Inline', 'Common'); + $strong->formatting = true; + + $code = $this->addElement('code', 'Inline', 'Inline', 'Common'); + $code->formatting = true; + + // Inline Structural ---------------------------------------------- + $this->addElement('span', 'Inline', 'Inline', 'Common'); + $this->addElement('br', 'Inline', 'Empty', 'Core'); + + // Block Phrasal -------------------------------------------------- + $this->addElement('address', 'Block', 'Inline', 'Common'); + $this->addElement('blockquote', 'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') ); + $pre = $this->addElement('pre', 'Block', 'Inline', 'Common'); + $pre->excludes = $this->makeLookup( + 'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' ); + $this->addElement('h1', 'Heading', 'Inline', 'Common'); + $this->addElement('h2', 'Heading', 'Inline', 'Common'); + $this->addElement('h3', 'Heading', 'Inline', 'Common'); + $this->addElement('h4', 'Heading', 'Inline', 'Common'); + $this->addElement('h5', 'Heading', 'Inline', 'Common'); + $this->addElement('h6', 'Heading', 'Inline', 'Common'); + + // Block Structural ----------------------------------------------- + $p = $this->addElement('p', 'Block', 'Inline', 'Common'); + $p->autoclose = array_flip(array("address", "blockquote", "center", "dir", "div", "dl", "fieldset", "ol", "p", "ul")); + + $this->addElement('div', 'Block', 'Flow', 'Common'); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Tidy.php b/library/HTMLPurifier/HTMLModule/Tidy.php new file mode 100644 index 000000000..21783f18e --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tidy.php @@ -0,0 +1,207 @@ +<?php + +/** + * Abstract class for a set of proprietary modules that clean up (tidy) + * poorly written HTML. + * @todo Figure out how to protect some of these methods/properties + */ +class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule +{ + + /** + * List of supported levels. Index zero is a special case "no fixes" + * level. + */ + public $levels = array(0 => 'none', 'light', 'medium', 'heavy'); + + /** + * Default level to place all fixes in. Disabled by default + */ + public $defaultLevel = null; + + /** + * Lists of fixes used by getFixesForLevel(). Format is: + * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2'); + */ + public $fixesForLevel = array( + 'light' => array(), + 'medium' => array(), + 'heavy' => array() + ); + + /** + * Lazy load constructs the module by determining the necessary + * fixes to create and then delegating to the populate() function. + * @todo Wildcard matching and error reporting when an added or + * subtracted fix has no effect. + */ + public function setup($config) { + + // create fixes, initialize fixesForLevel + $fixes = $this->makeFixes(); + $this->makeFixesForLevel($fixes); + + // figure out which fixes to use + $level = $config->get('HTML.TidyLevel'); + $fixes_lookup = $this->getFixesForLevel($level); + + // get custom fix declarations: these need namespace processing + $add_fixes = $config->get('HTML.TidyAdd'); + $remove_fixes = $config->get('HTML.TidyRemove'); + + foreach ($fixes as $name => $fix) { + // needs to be refactored a little to implement globbing + if ( + isset($remove_fixes[$name]) || + (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name])) + ) { + unset($fixes[$name]); + } + } + + // populate this module with necessary fixes + $this->populate($fixes); + + } + + /** + * Retrieves all fixes per a level, returning fixes for that specific + * level as well as all levels below it. + * @param $level String level identifier, see $levels for valid values + * @return Lookup up table of fixes + */ + public function getFixesForLevel($level) { + if ($level == $this->levels[0]) { + return array(); + } + $activated_levels = array(); + for ($i = 1, $c = count($this->levels); $i < $c; $i++) { + $activated_levels[] = $this->levels[$i]; + if ($this->levels[$i] == $level) break; + } + if ($i == $c) { + trigger_error( + 'Tidy level ' . htmlspecialchars($level) . ' not recognized', + E_USER_WARNING + ); + return array(); + } + $ret = array(); + foreach ($activated_levels as $level) { + foreach ($this->fixesForLevel[$level] as $fix) { + $ret[$fix] = true; + } + } + return $ret; + } + + /** + * Dynamically populates the $fixesForLevel member variable using + * the fixes array. It may be custom overloaded, used in conjunction + * with $defaultLevel, or not used at all. + */ + public function makeFixesForLevel($fixes) { + if (!isset($this->defaultLevel)) return; + if (!isset($this->fixesForLevel[$this->defaultLevel])) { + trigger_error( + 'Default level ' . $this->defaultLevel . ' does not exist', + E_USER_ERROR + ); + return; + } + $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes); + } + + /** + * Populates the module with transforms and other special-case code + * based on a list of fixes passed to it + * @param $lookup Lookup table of fixes to activate + */ + public function populate($fixes) { + foreach ($fixes as $name => $fix) { + // determine what the fix is for + list($type, $params) = $this->getFixType($name); + switch ($type) { + case 'attr_transform_pre': + case 'attr_transform_post': + $attr = $params['attr']; + if (isset($params['element'])) { + $element = $params['element']; + if (empty($this->info[$element])) { + $e = $this->addBlankElement($element); + } else { + $e = $this->info[$element]; + } + } else { + $type = "info_$type"; + $e = $this; + } + // PHP does some weird parsing when I do + // $e->$type[$attr], so I have to assign a ref. + $f =& $e->$type; + $f[$attr] = $fix; + break; + case 'tag_transform': + $this->info_tag_transform[$params['element']] = $fix; + break; + case 'child': + case 'content_model_type': + $element = $params['element']; + if (empty($this->info[$element])) { + $e = $this->addBlankElement($element); + } else { + $e = $this->info[$element]; + } + $e->$type = $fix; + break; + default: + trigger_error("Fix type $type not supported", E_USER_ERROR); + break; + } + } + } + + /** + * Parses a fix name and determines what kind of fix it is, as well + * as other information defined by the fix + * @param $name String name of fix + * @return array(string $fix_type, array $fix_parameters) + * @note $fix_parameters is type dependant, see populate() for usage + * of these parameters + */ + public function getFixType($name) { + // parse it + $property = $attr = null; + if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name); + if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name); + + // figure out the parameters + $params = array(); + if ($name !== '') $params['element'] = $name; + if (!is_null($attr)) $params['attr'] = $attr; + + // special case: attribute transform + if (!is_null($attr)) { + if (is_null($property)) $property = 'pre'; + $type = 'attr_transform_' . $property; + return array($type, $params); + } + + // special case: tag transform + if (is_null($property)) { + return array('tag_transform', $params); + } + + return array($property, $params); + + } + + /** + * Defines all fixes the module will perform in a compact + * associative array of fix name to fix implementation. + */ + public function makeFixes() {} + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Tidy/Name.php b/library/HTMLPurifier/HTMLModule/Tidy/Name.php new file mode 100644 index 000000000..61ff85ce2 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tidy/Name.php @@ -0,0 +1,24 @@ +<?php + +/** + * Name is deprecated, but allowed in strict doctypes, so onl + */ +class HTMLPurifier_HTMLModule_Tidy_Name extends HTMLPurifier_HTMLModule_Tidy +{ + public $name = 'Tidy_Name'; + public $defaultLevel = 'heavy'; + public function makeFixes() { + + $r = array(); + + // @name for img, a ----------------------------------------------- + // Technically, it's allowed even on strict, so we allow authors to use + // it. However, it's deprecated in future versions of XHTML. + $r['img@name'] = + $r['a@name'] = new HTMLPurifier_AttrTransform_Name(); + + return $r; + } +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Tidy/Proprietary.php b/library/HTMLPurifier/HTMLModule/Tidy/Proprietary.php new file mode 100644 index 000000000..14c15c4a0 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tidy/Proprietary.php @@ -0,0 +1,24 @@ +<?php + +class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_Tidy +{ + + public $name = 'Tidy_Proprietary'; + public $defaultLevel = 'light'; + + public function makeFixes() { + $r = array(); + $r['table@background'] = new HTMLPurifier_AttrTransform_Background(); + $r['td@background'] = new HTMLPurifier_AttrTransform_Background(); + $r['th@background'] = new HTMLPurifier_AttrTransform_Background(); + $r['tr@background'] = new HTMLPurifier_AttrTransform_Background(); + $r['thead@background'] = new HTMLPurifier_AttrTransform_Background(); + $r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background(); + $r['tbody@background'] = new HTMLPurifier_AttrTransform_Background(); + $r['table@height'] = new HTMLPurifier_AttrTransform_Length('height'); + return $r; + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Tidy/Strict.php b/library/HTMLPurifier/HTMLModule/Tidy/Strict.php new file mode 100644 index 000000000..c73dc3c4d --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tidy/Strict.php @@ -0,0 +1,21 @@ +<?php + +class HTMLPurifier_HTMLModule_Tidy_Strict extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 +{ + public $name = 'Tidy_Strict'; + public $defaultLevel = 'light'; + + public function makeFixes() { + $r = parent::makeFixes(); + $r['blockquote#content_model_type'] = 'strictblockquote'; + return $r; + } + + public $defines_child_def = true; + public function getChildDef($def) { + if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def); + return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); + } +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Tidy/Transitional.php b/library/HTMLPurifier/HTMLModule/Tidy/Transitional.php new file mode 100644 index 000000000..9960b1dd1 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tidy/Transitional.php @@ -0,0 +1,9 @@ +<?php + +class HTMLPurifier_HTMLModule_Tidy_Transitional extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 +{ + public $name = 'Tidy_Transitional'; + public $defaultLevel = 'heavy'; +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTML.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTML.php new file mode 100644 index 000000000..db5a378e5 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tidy/XHTML.php @@ -0,0 +1,17 @@ +<?php + +class HTMLPurifier_HTMLModule_Tidy_XHTML extends HTMLPurifier_HTMLModule_Tidy +{ + + public $name = 'Tidy_XHTML'; + public $defaultLevel = 'medium'; + + public function makeFixes() { + $r = array(); + $r['@lang'] = new HTMLPurifier_AttrTransform_Lang(); + return $r; + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php new file mode 100644 index 000000000..02e943813 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php @@ -0,0 +1,161 @@ +<?php + +class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends HTMLPurifier_HTMLModule_Tidy +{ + + public function makeFixes() { + + $r = array(); + + // == deprecated tag transforms =================================== + + $r['font'] = new HTMLPurifier_TagTransform_Font(); + $r['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); + $r['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); + $r['center'] = new HTMLPurifier_TagTransform_Simple('div', 'text-align:center;'); + $r['u'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:underline;'); + $r['s'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;'); + $r['strike'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;'); + + // == deprecated attribute transforms ============================= + + $r['caption@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + // we're following IE's behavior, not Firefox's, due + // to the fact that no one supports caption-side:right, + // W3C included (with CSS 2.1). This is a slightly + // unreasonable attribute! + 'left' => 'text-align:left;', + 'right' => 'text-align:right;', + 'top' => 'caption-side:top;', + 'bottom' => 'caption-side:bottom;' // not supported by IE + )); + + // @align for img ------------------------------------------------- + $r['img@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + 'left' => 'float:left;', + 'right' => 'float:right;', + 'top' => 'vertical-align:top;', + 'middle' => 'vertical-align:middle;', + 'bottom' => 'vertical-align:baseline;', + )); + + // @align for table ----------------------------------------------- + $r['table@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + 'left' => 'float:left;', + 'center' => 'margin-left:auto;margin-right:auto;', + 'right' => 'float:right;' + )); + + // @align for hr ----------------------------------------------- + $r['hr@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + // we use both text-align and margin because these work + // for different browsers (IE and Firefox, respectively) + // and the melange makes for a pretty cross-compatible + // solution + 'left' => 'margin-left:0;margin-right:auto;text-align:left;', + 'center' => 'margin-left:auto;margin-right:auto;text-align:center;', + 'right' => 'margin-left:auto;margin-right:0;text-align:right;' + )); + + // @align for h1, h2, h3, h4, h5, h6, p, div ---------------------- + // {{{ + $align_lookup = array(); + $align_values = array('left', 'right', 'center', 'justify'); + foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; + // }}} + $r['h1@align'] = + $r['h2@align'] = + $r['h3@align'] = + $r['h4@align'] = + $r['h5@align'] = + $r['h6@align'] = + $r['p@align'] = + $r['div@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); + + // @bgcolor for table, tr, td, th --------------------------------- + $r['table@bgcolor'] = + $r['td@bgcolor'] = + $r['th@bgcolor'] = + new HTMLPurifier_AttrTransform_BgColor(); + + // @border for img ------------------------------------------------ + $r['img@border'] = new HTMLPurifier_AttrTransform_Border(); + + // @clear for br -------------------------------------------------- + $r['br@clear'] = + new HTMLPurifier_AttrTransform_EnumToCSS('clear', array( + 'left' => 'clear:left;', + 'right' => 'clear:right;', + 'all' => 'clear:both;', + 'none' => 'clear:none;', + )); + + // @height for td, th --------------------------------------------- + $r['td@height'] = + $r['th@height'] = + new HTMLPurifier_AttrTransform_Length('height'); + + // @hspace for img ------------------------------------------------ + $r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace'); + + // @noshade for hr ------------------------------------------------ + // this transformation is not precise but often good enough. + // different browsers use different styles to designate noshade + $r['hr@noshade'] = + new HTMLPurifier_AttrTransform_BoolToCSS( + 'noshade', + 'color:#808080;background-color:#808080;border:0;' + ); + + // @nowrap for td, th --------------------------------------------- + $r['td@nowrap'] = + $r['th@nowrap'] = + new HTMLPurifier_AttrTransform_BoolToCSS( + 'nowrap', + 'white-space:nowrap;' + ); + + // @size for hr -------------------------------------------------- + $r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height'); + + // @type for li, ol, ul ------------------------------------------- + // {{{ + $ul_types = array( + 'disc' => 'list-style-type:disc;', + 'square' => 'list-style-type:square;', + 'circle' => 'list-style-type:circle;' + ); + $ol_types = array( + '1' => 'list-style-type:decimal;', + 'i' => 'list-style-type:lower-roman;', + 'I' => 'list-style-type:upper-roman;', + 'a' => 'list-style-type:lower-alpha;', + 'A' => 'list-style-type:upper-alpha;' + ); + $li_types = $ul_types + $ol_types; + // }}} + + $r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types); + $r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true); + $r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true); + + // @vspace for img ------------------------------------------------ + $r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace'); + + // @width for hr, td, th ------------------------------------------ + $r['td@width'] = + $r['th@width'] = + $r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width'); + + return $r; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/XMLCommonAttributes.php b/library/HTMLPurifier/HTMLModule/XMLCommonAttributes.php new file mode 100644 index 000000000..9c0e03198 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/XMLCommonAttributes.php @@ -0,0 +1,14 @@ +<?php + +class HTMLPurifier_HTMLModule_XMLCommonAttributes extends HTMLPurifier_HTMLModule +{ + public $name = 'XMLCommonAttributes'; + + public $attr_collections = array( + 'Lang' => array( + 'xml:lang' => 'LanguageCode', + ) + ); +} + +// vim: et sw=4 sts=4 |