diff options
Diffstat (limited to 'actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb')
-rw-r--r-- | actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb | 56 |
1 files changed, 28 insertions, 28 deletions
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb index 51e0868995..09dd08898c 100644 --- a/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb +++ b/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb @@ -1,5 +1,5 @@ require 'set' -require 'active_support/core_ext/class/inheritable_attributes' +require 'active_support/core_ext/class/attribute' module HTML class Sanitizer @@ -7,11 +7,11 @@ module HTML return text unless sanitizeable?(text) tokenize(text, options).join end - + def sanitizeable?(text) !(text.nil? || text.empty? || !text.index("<")) end - + protected def tokenize(text, options) tokenizer = HTML::Tokenizer.new(text) @@ -22,12 +22,12 @@ module HTML end result end - + def process_node(node, result, options) result << node.to_s end end - + class FullSanitizer < Sanitizer def sanitize(text, options = {}) result = super @@ -37,12 +37,12 @@ module HTML # Recurse - handle all dirty nested tags result == text ? result : sanitize(result, options) end - + def process_node(node, result, options) result << node.to_s if node.class == HTML::Text end end - + class LinkSanitizer < FullSanitizer cattr_accessor :included_tags, :instance_writer => false self.included_tags = Set.new(%w(a href)) @@ -50,51 +50,51 @@ module HTML def sanitizeable?(text) !(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">"))) end - + protected def process_node(node, result, options) - result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name) + result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name) end end - + class WhiteListSanitizer < Sanitizer [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags, :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr| - class_inheritable_accessor attr, :instance_writer => false + class_attribute attr, :instance_writer => false end # A regular expression of the valid characters used to separate protocols like # the ':' in 'http://foo.com' self.protocol_separator = /:|(�*58)|(p)|(%|%)3A/ - + # Specifies a Set of HTML attributes that can have URIs. self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc)) # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed # to just escaping harmless tags like <font> self.bad_tags = Set.new(%w(script)) - + # Specifies the default Set of tags that the #sanitize helper will allow unscathed. - self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub - sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr + self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub + sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr acronym a img blockquote del ins)) - # Specifies the default Set of html attributes that the #sanitize helper will leave + # Specifies the default Set of html attributes that the #sanitize helper will leave # in the allowed tag. self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) - + # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. - self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto + self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed svn urn aim rsync tag ssh sftp rtsp afs)) - + # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. - self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse - border-color border-left-color border-right-color border-top-color clear color cursor direction display + self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse + border-color border-left-color border-right-color border-top-color clear color cursor direction display elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space width)) - + # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal @@ -118,9 +118,9 @@ module HTML style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val| if allowed_css_properties.include?(prop.downcase) clean << prop + ': ' + val + ';' - elsif shorthand_css_properties.include?(prop.split('-')[0].downcase) + elsif shorthand_css_properties.include?(prop.split('-')[0].downcase) unless val.split().any? do |keyword| - !allowed_css_keywords.include?(keyword) && + !allowed_css_keywords.include?(keyword) && keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/ end clean << prop + ': ' + val + ';' @@ -146,7 +146,7 @@ module HTML else options[:parent].unshift node.name end - + process_attributes_for node, options options[:tags].include?(node.name) ? node : nil @@ -154,7 +154,7 @@ module HTML bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "<") end end - + def process_attributes_for(node, options) return unless node.attributes node.attributes.keys.each do |attr_name| @@ -169,8 +169,8 @@ module HTML end def contains_bad_protocols?(attr_name, value) - uri_attributes.include?(attr_name) && - (value =~ /(^[^\/:]*):|(�*58)|(p)|(%|%)3A/ && !allowed_protocols.include?(value.split(protocol_separator).first)) + uri_attributes.include?(attr_name) && + (value =~ /(^[^\/:]*):|(�*58)|(p)|(%|%)3A/ && !allowed_protocols.include?(value.split(protocol_separator).first.downcase)) end end end |