aboutsummaryrefslogblamecommitdiffstats
path: root/library/HTMLPurifier/HTMLModule/Tidy.php
blob: 08aa23247032afd76e6754aa2cf7e76a0a17f47c (plain) (tree)
1
2
3
4
5
6
7
8
9








                                                                       
       


                                                     



                                                                    


                                           



                                

                                                 
                                                                             
                  

                                  
                           
                            
                          




                                                                      
                                         


                                                                   

                                  








                                                                       
                                                  



                                                                    

                                                                              





                                                    




                                                                         

                                                                          
       

                                            





                                                                


                                              




















                                                                             
                          
       




                                             












                                                                           
                                                            
       

                                    


















































                                                                                

                                     

                                 





                                                         


                                    





                                       


                                            


                                     















                                                             
                    
       


                               


                     
<?php

/**
 * Abstract class for a set of proprietary modules that clean up (tidy)
 * poorly written HTML.
 * @todo Figure out how to protect some of these methods/properties
 */
class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
{
    /**
     * List of supported levels.
     * Index zero is a special case "no fixes" level.
     * @type array
     */
    public $levels = array(0 => 'none', 'light', 'medium', 'heavy');

    /**
     * Default level to place all fixes in.
     * Disabled by default.
     * @type string
     */
    public $defaultLevel = null;

    /**
     * Lists of fixes used by getFixesForLevel().
     * Format is:
     *      HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
     * @type array
     */
    public $fixesForLevel = array(
        'light' => array(),
        'medium' => array(),
        'heavy' => array()
    );

    /**
     * Lazy load constructs the module by determining the necessary
     * fixes to create and then delegating to the populate() function.
     * @param HTMLPurifier_Config $config
     * @todo Wildcard matching and error reporting when an added or
     *       subtracted fix has no effect.
     */
    public function setup($config)
    {
        // create fixes, initialize fixesForLevel
        $fixes = $this->makeFixes();
        $this->makeFixesForLevel($fixes);

        // figure out which fixes to use
        $level = $config->get('HTML.TidyLevel');
        $fixes_lookup = $this->getFixesForLevel($level);

        // get custom fix declarations: these need namespace processing
        $add_fixes = $config->get('HTML.TidyAdd');
        $remove_fixes = $config->get('HTML.TidyRemove');

        foreach ($fixes as $name => $fix) {
            // needs to be refactored a little to implement globbing
            if (isset($remove_fixes[$name]) ||
                (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))) {
                unset($fixes[$name]);
            }
        }

        // populate this module with necessary fixes
        $this->populate($fixes);
    }

    /**
     * Retrieves all fixes per a level, returning fixes for that specific
     * level as well as all levels below it.
     * @param string $level level identifier, see $levels for valid values
     * @return array Lookup up table of fixes
     */
    public function getFixesForLevel($level)
    {
        if ($level == $this->levels[0]) {
            return array();
        }
        $activated_levels = array();
        for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
            $activated_levels[] = $this->levels[$i];
            if ($this->levels[$i] == $level) {
                break;
            }
        }
        if ($i == $c) {
            trigger_error(
                'Tidy level ' . htmlspecialchars($level) . ' not recognized',
                E_USER_WARNING
            );
            return array();
        }
        $ret = array();
        foreach ($activated_levels as $level) {
            foreach ($this->fixesForLevel[$level] as $fix) {
                $ret[$fix] = true;
            }
        }
        return $ret;
    }

    /**
     * Dynamically populates the $fixesForLevel member variable using
     * the fixes array. It may be custom overloaded, used in conjunction
     * with $defaultLevel, or not used at all.
     * @param array $fixes
     */
    public function makeFixesForLevel($fixes)
    {
        if (!isset($this->defaultLevel)) {
            return;
        }
        if (!isset($this->fixesForLevel[$this->defaultLevel])) {
            trigger_error(
                'Default level ' . $this->defaultLevel . ' does not exist',
                E_USER_ERROR
            );
            return;
        }
        $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
    }

    /**
     * Populates the module with transforms and other special-case code
     * based on a list of fixes passed to it
     * @param array $fixes Lookup table of fixes to activate
     */
    public function populate($fixes)
    {
        foreach ($fixes as $name => $fix) {
            // determine what the fix is for
            list($type, $params) = $this->getFixType($name);
            switch ($type) {
                case 'attr_transform_pre':
                case 'attr_transform_post':
                    $attr = $params['attr'];
                    if (isset($params['element'])) {
                        $element = $params['element'];
                        if (empty($this->info[$element])) {
                            $e = $this->addBlankElement($element);
                        } else {
                            $e = $this->info[$element];
                        }
                    } else {
                        $type = "info_$type";
                        $e = $this;
                    }
                    // PHP does some weird parsing when I do
                    // $e->$type[$attr], so I have to assign a ref.
                    $f =& $e->$type;
                    $f[$attr] = $fix;
                    break;
                case 'tag_transform':
                    $this->info_tag_transform[$params['element']] = $fix;
                    break;
                case 'child':
                case 'content_model_type':
                    $element = $params['element'];
                    if (empty($this->info[$element])) {
                        $e = $this->addBlankElement($element);
                    } else {
                        $e = $this->info[$element];
                    }
                    $e->$type = $fix;
                    break;
                default:
                    trigger_error("Fix type $type not supported", E_USER_ERROR);
                    break;
            }
        }
    }

    /**
     * Parses a fix name and determines what kind of fix it is, as well
     * as other information defined by the fix
     * @param $name String name of fix
     * @return array(string $fix_type, array $fix_parameters)
     * @note $fix_parameters is type dependant, see populate() for usage
     *       of these parameters
     */
    public function getFixType($name)
    {
        // parse it
        $property = $attr = null;
        if (strpos($name, '#') !== false) {
            list($name, $property) = explode('#', $name);
        }
        if (strpos($name, '@') !== false) {
            list($name, $attr) = explode('@', $name);
        }

        // figure out the parameters
        $params = array();
        if ($name !== '') {
            $params['element'] = $name;
        }
        if (!is_null($attr)) {
            $params['attr'] = $attr;
        }

        // special case: attribute transform
        if (!is_null($attr)) {
            if (is_null($property)) {
                $property = 'pre';
            }
            $type = 'attr_transform_' . $property;
            return array($type, $params);
        }

        // special case: tag transform
        if (is_null($property)) {
            return array('tag_transform', $params);
        }

        return array($property, $params);

    }

    /**
     * Defines all fixes the module will perform in a compact
     * associative array of fix name to fix implementation.
     * @return array
     */
    public function makeFixes()
    {
    }
}

// vim: et sw=4 sts=4