aboutsummaryrefslogtreecommitdiffstats
path: root/library/HTMLPurifier/HTMLModule.php
blob: 072cf68084aff232148e8416ffdfda993d89011d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
<?php

/**
 * Represents an XHTML 1.1 module, with information on elements, tags
 * and attributes.
 * @note Even though this is technically XHTML 1.1, it is also used for
 *       regular HTML parsing. We are using modulization as a convenient
 *       way to represent the internals of HTMLDefinition, and our
 *       implementation is by no means conforming and does not directly
 *       use the normative DTDs or XML schemas.
 * @note The public variables in a module should almost directly
 *       correspond to the variables in HTMLPurifier_HTMLDefinition.
 *       However, the prefix info carries no special meaning in these
 *       objects (include it anyway if that's the correspondence though).
 * @todo Consider making some member functions protected
 */

class HTMLPurifier_HTMLModule
{

    // -- Overloadable ----------------------------------------------------

    /**
     * Short unique string identifier of the module
     */
    public $name;

    /**
     * Informally, a list of elements this module changes. Not used in
     * any significant way.
     */
    public $elements = array();

    /**
     * Associative array of element names to element definitions.
     * Some definitions may be incomplete, to be merged in later
     * with the full definition.
     */
    public $info = array();

    /**
     * Associative array of content set names to content set additions.
     * This is commonly used to, say, add an A element to the Inline
     * content set. This corresponds to an internal variable $content_sets
     * and NOT info_content_sets member variable of HTMLDefinition.
     */
    public $content_sets = array();

    /**
     * Associative array of attribute collection names to attribute
     * collection additions. More rarely used for adding attributes to
     * the global collections. Example is the StyleAttribute module adding
     * the style attribute to the Core. Corresponds to HTMLDefinition's
     * attr_collections->info, since the object's data is only info,
     * with extra behavior associated with it.
     */
    public $attr_collections = array();

    /**
     * Associative array of deprecated tag name to HTMLPurifier_TagTransform
     */
    public $info_tag_transform = array();

    /**
     * List of HTMLPurifier_AttrTransform to be performed before validation.
     */
    public $info_attr_transform_pre = array();

    /**
     * List of HTMLPurifier_AttrTransform to be performed after validation.
     */
    public $info_attr_transform_post = array();

    /**
     * List of HTMLPurifier_Injector to be performed during well-formedness fixing.
     * An injector will only be invoked if all of it's pre-requisites are met;
     * if an injector fails setup, there will be no error; it will simply be
     * silently disabled.
     */
    public $info_injector = array();

    /**
     * Boolean flag that indicates whether or not getChildDef is implemented.
     * For optimization reasons: may save a call to a function. Be sure
     * to set it if you do implement getChildDef(), otherwise it will have
     * no effect!
     */
    public $defines_child_def = false;

    /**
     * Boolean flag whether or not this module is safe. If it is not safe, all
     * of its members are unsafe. Modules are safe by default (this might be
     * slightly dangerous, but it doesn't make much sense to force HTML Purifier,
     * which is based off of safe HTML, to explicitly say, "This is safe," even
     * though there are modules which are "unsafe")
     *
     * @note Previously, safety could be applied at an element level granularity.
     *       We've removed this ability, so in order to add "unsafe" elements
     *       or attributes, a dedicated module with this property set to false
     *       must be used.
     */
    public $safe = true;

    /**
     * Retrieves a proper HTMLPurifier_ChildDef subclass based on
     * content_model and content_model_type member variables of
     * the HTMLPurifier_ElementDef class. There is a similar function
     * in HTMLPurifier_HTMLDefinition.
     * @param $def HTMLPurifier_ElementDef instance
     * @return HTMLPurifier_ChildDef subclass
     */
    public function getChildDef($def) {return false;}

    // -- Convenience -----------------------------------------------------

    /**
     * Convenience function that sets up a new element
     * @param $element Name of element to add
     * @param $type What content set should element be registered to?
     *              Set as false to skip this step.
     * @param $contents Allowed children in form of:
     *              "$content_model_type: $content_model"
     * @param $attr_includes What attribute collections to register to
     *              element?
     * @param $attr What unique attributes does the element define?
     * @note See ElementDef for in-depth descriptions of these parameters.
     * @return Created element definition object, so you
     *         can set advanced parameters
     */
    public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) {
        $this->elements[] = $element;
        // parse content_model
        list($content_model_type, $content_model) = $this->parseContents($contents);
        // merge in attribute inclusions
        $this->mergeInAttrIncludes($attr, $attr_includes);
        // add element to content sets
        if ($type) $this->addElementToContentSet($element, $type);
        // create element
        $this->info[$element] = HTMLPurifier_ElementDef::create(
            $content_model, $content_model_type, $attr
        );
        // literal object $contents means direct child manipulation
        if (!is_string($contents)) $this->info[$element]->child = $contents;
        return $this->info[$element];
    }

    /**
     * Convenience function that creates a totally blank, non-standalone
     * element.
     * @param $element Name of element to create
     * @return Created element
     */
    public function addBlankElement($element) {
        if (!isset($this->info[$element])) {
            $this->elements[] = $element;
            $this->info[$element] = new HTMLPurifier_ElementDef();
            $this->info[$element]->standalone = false;
        } else {
            trigger_error("Definition for $element already exists in module, cannot redefine");
        }
        return $this->info[$element];
    }

    /**
     * Convenience function that registers an element to a content set
     * @param Element to register
     * @param Name content set (warning: case sensitive, usually upper-case
     *        first letter)
     */
    public function addElementToContentSet($element, $type) {
        if (!isset($this->content_sets[$type])) $this->content_sets[$type] = '';
        else $this->content_sets[$type] .= ' | ';
        $this->content_sets[$type] .= $element;
    }

    /**
     * Convenience function that transforms single-string contents
     * into separate content model and content model type
     * @param $contents Allowed children in form of:
     *                  "$content_model_type: $content_model"
     * @note If contents is an object, an array of two nulls will be
     *       returned, and the callee needs to take the original $contents
     *       and use it directly.
     */
    public function parseContents($contents) {
        if (!is_string($contents)) return array(null, null); // defer
        switch ($contents) {
            // check for shorthand content model forms
            case 'Empty':
                return array('empty', '');
            case 'Inline':
                return array('optional', 'Inline | #PCDATA');
            case 'Flow':
                return array('optional', 'Flow | #PCDATA');
        }
        list($content_model_type, $content_model) = explode(':', $contents);
        $content_model_type = strtolower(trim($content_model_type));
        $content_model = trim($content_model);
        return array($content_model_type, $content_model);
    }

    /**
     * Convenience function that merges a list of attribute includes into
     * an attribute array.
     * @param $attr Reference to attr array to modify
     * @param $attr_includes Array of includes / string include to merge in
     */
    public function mergeInAttrIncludes(&$attr, $attr_includes) {
        if (!is_array($attr_includes)) {
            if (empty($attr_includes)) $attr_includes = array();
            else $attr_includes = array($attr_includes);
        }
        $attr[0] = $attr_includes;
    }

    /**
     * Convenience function that generates a lookup table with boolean
     * true as value.
     * @param $list List of values to turn into a lookup
     * @note You can also pass an arbitrary number of arguments in
     *       place of the regular argument
     * @return Lookup array equivalent of list
     */
    public function makeLookup($list) {
        if (is_string($list)) $list = func_get_args();
        $ret = array();
        foreach ($list as $value) {
            if (is_null($value)) continue;
            $ret[$value] = true;
        }
        return $ret;
    }

    /**
     * Lazy load construction of the module after determining whether
     * or not it's needed, and also when a finalized configuration object
     * is available.
     * @param $config Instance of HTMLPurifier_Config
     */
    public function setup($config) {}

}

// vim: et sw=4 sts=4