diff options
author | Rick Olson <technoweenie@gmail.com> | 2007-11-26 03:45:54 +0000 |
---|---|---|
committer | Rick Olson <technoweenie@gmail.com> | 2007-11-26 03:45:54 +0000 |
commit | 1af084ecda66a8e1b4eb3a51a07ebca85bf2e419 (patch) | |
tree | 2eb68b7cf6357726feb52653f480c099568d5da3 | |
parent | bd5ed651105663cb4bc5acd86ad8bdf48251d0fe (diff) | |
download | rails-1af084ecda66a8e1b4eb3a51a07ebca85bf2e419.tar.gz rails-1af084ecda66a8e1b4eb3a51a07ebca85bf2e419.tar.bz2 rails-1af084ecda66a8e1b4eb3a51a07ebca85bf2e419.zip |
Refactor sanitizer helpers into HTML classes and make it easy to swap them out with custom implementations. Closes #10129. [rick]
git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@8213 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
6 files changed, 475 insertions, 342 deletions
diff --git a/actionpack/CHANGELOG b/actionpack/CHANGELOG index f95a600c26..26b533b13c 100644 --- a/actionpack/CHANGELOG +++ b/actionpack/CHANGELOG @@ -1,5 +1,7 @@ *SVN* +* Refactor sanitizer helpers into HTML classes and make it easy to swap them out with custom implementations. Closes #10129. [rick] + * Add deprecation for old subtemplate syntax for ActionMailer templates, use render :partial [rick] * Fix TemplateError so it doesn't bomb on exceptions while running tests [rick] diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb index 329ab01560..607fd186b9 100644 --- a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb +++ b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb @@ -1,6 +1,7 @@ require 'html/tokenizer' require 'html/node' require 'html/selector' +require 'html/sanitizer' module HTML #:nodoc: # A top-level HTMl document. You give it a body of text, and it will parse that diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb new file mode 100644 index 0000000000..377e81aead --- /dev/null +++ b/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb @@ -0,0 +1,173 @@ +module HTML + class Sanitizer + def sanitize(text, options = {}) + return text unless sanitizeable?(text) + tokenize(text, options).join + end + + def sanitizeable?(text) + !(text.nil? || text.empty? || !text.index("<")) + end + + protected + def tokenize(text, options) + tokenizer = HTML::Tokenizer.new(text) + result = [] + while token = tokenizer.next + node = Node.parse(nil, 0, 0, token, false) + process_node node, result, options + end + result + end + + def process_node(node, result, options) + result << node.to_s + end + end + + class FullSanitizer < Sanitizer + def sanitize(text, options = {}) + result = super + # strip any comments, and if they have a newline at the end (ie. line with + # only a comment) strip that too + result.gsub!(/<!--(.*?)-->[\n]?/m, "") if result + # Recurse - handle all dirty nested tags + result == text ? result : sanitize(result, options) + end + + def process_node(node, result, options) + result << node.to_s if node.class == HTML::Text + end + end + + class LinkSanitizer < FullSanitizer + cattr_accessor :included_tags, :instance_writer => false + self.included_tags = Set.new(%w(a href)) + + def sanitizeable?(text) + !(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">"))) + end + + protected + def process_node(node, result, options) + result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name) + end + end + + class WhiteListSanitizer < Sanitizer + [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags, + :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr| + class_inheritable_accessor attr, :instance_writer => false + end + + # A regular expression of the valid characters used to separate protocols like + # the ':' in 'http://foo.com' + self.protocol_separator = /:|(�*58)|(p)|(%|%)3A/ + + # Specifies a Set of HTML attributes that can have URIs. + self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc)) + + # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed + # to just escaping harmless tags like <font> + self.bad_tags = Set.new(%w(script)) + + # Specifies the default Set of tags that the #sanitize helper will allow unscathed. + self.allowed_tags = Set.new(%w(strong em b i p code pre tt output samp kbd var sub + sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr + acronym a img blockquote del ins fieldset legend)) + + # Specifies the default Set of html attributes that the #sanitize helper will leave + # in the allowed tag. + self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) + + # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. + self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto + feed svn urn aim rsync tag ssh sftp rtsp afs)) + + # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. + self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse + border-color border-left-color border-right-color border-top-color clear color cursor direction display + elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height + overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation + speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space + width)) + + # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. + self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center + collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal + nowrap olive pointer purple red right solid silver teal top transparent underline white yellow)) + + # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. + self.shorthand_css_properties = Set.new(%w(background border margin padding)) + + # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute + def sanitize_css(style) + # disallow urls + style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ') + + # gauntlet + if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ || + style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/ + return '' + end + + clean = [] + style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val| + if allowed_css_properties.include?(prop.downcase) + clean << prop + ': ' + val + ';' + elsif shorthand_css_properties.include?(prop.split('-')[0].downcase) + unless val.split().any? do |keyword| + !allowed_css_keywords.include?(keyword) && + keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/ + end + clean << prop + ': ' + val + ';' + end + end + end + clean.join(' ') + end + + protected + def tokenize(text, options) + options[:parent] = [] + options[:attributes] ||= allowed_attributes + options[:tags] ||= allowed_tags + super + end + + def process_node(node, result, options) + result << case node + when HTML::Tag + if node.closing == :close + options[:parent].shift + else + options[:parent].unshift node.name + end + + process_attributes_for node, options + + options[:tags].include?(node.name) ? node : nil + else + bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "<") + end + end + + def process_attributes_for(node, options) + return unless node.attributes + node.attributes.keys.each do |attr_name| + value = node.attributes[attr_name].to_s + + if !options[:attributes].include?(attr_name) || contains_bad_protocols?(attr_name, value) + node.attributes.delete(attr_name) + else + node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(value) + end + end + end + + def contains_bad_protocols?(attr_name, value) + uri_attributes.include?(attr_name) && + (value =~ /(^[^\/:]*):|(�*58)|(p)|(%|%)3A/ && !allowed_protocols.include?(value.split(protocol_separator).first)) + end + end +end
\ No newline at end of file diff --git a/actionpack/lib/action_view/helpers/sanitize_helper.rb b/actionpack/lib/action_view/helpers/sanitize_helper.rb index e67abd9f67..47fbe3a27a 100644 --- a/actionpack/lib/action_view/helpers/sanitize_helper.rb +++ b/actionpack/lib/action_view/helpers/sanitize_helper.rb @@ -49,69 +49,12 @@ module ActionView # end # def sanitize(html, options = {}) - return html if html.blank? || !html.include?('<') - - attrs = options[:attributes] || sanitized_allowed_attributes - tags = options[:tags] || sanitized_allowed_tags - - returning [] do |new_text| - tokenizer = HTML::Tokenizer.new(html) - parent = [] - - while token = tokenizer.next - node = HTML::Node.parse(nil, 0, 0, token, false) - - new_text << case node - when HTML::Tag - if node.closing == :close - parent.shift - else - parent.unshift node.name - end - - node.attributes.keys.each do |attr_name| - value = node.attributes[attr_name].to_s - - if !attrs.include?(attr_name) || contains_bad_protocols?(attr_name, value) - node.attributes.delete(attr_name) - else - node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(value) - end - end if node.attributes - - tags.include?(node.name) ? node : nil - else - sanitized_bad_tags.include?(parent.first) ? nil : node.to_s.gsub(/</, "<") - end - end - end.join + self.class.white_list_sanitizer.sanitize(html, options) end # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute def sanitize_css(style) - # disallow urls - style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ') - - # gauntlet - if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ || - style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/ - return '' - end - - returning [] do |clean| - style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val| - if sanitized_allowed_css_properties.include?(prop.downcase) - clean << prop + ': ' + val + ';' - elsif sanitized_shorthand_css_properties.include?(prop.split('-')[0].downcase) - unless val.split().any? do |keyword| - !sanitized_allowed_css_keywords.include?(keyword) && - keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/ - end - clean << prop + ': ' + val + ';' - end - end - end - end.join(' ') + self.class.white_list_sanitizer.sanitize_css(style) end # Strips all HTML tags from the +html+, including comments. This uses the @@ -129,23 +72,7 @@ module ActionView # strip_tags("<div id='top-bar'>Welcome to my website!</div>") # # => Welcome to my website! def strip_tags(html) - return html if html.blank? || !html.index("<") - tokenizer = HTML::Tokenizer.new(html) - - text = returning [] do |text| - while token = tokenizer.next - node = HTML::Node.parse(nil, 0, 0, token, false) - # result is only the content of any Text nodes - text << node.to_s if node.class == HTML::Text - end - end - - # strip any comments, and if they have a newline at the end (ie. line with - # only a comment) strip that too - result = text.join.gsub(/<!--(.*?)-->[\n]?/m, "") - - # Recurse - handle all dirty nested tags - result == html ? result : strip_tags(result) + self.class.full_sanitizer.sanitize(html) end # Strips all link tags from +text+ leaving just the link text. @@ -160,80 +87,57 @@ module ActionView # strip_links('Blog: <a href="http://www.myblog.com/" class="nav" target=\"_blank\">Visit</a>.') # # => Blog: Visit def strip_links(html) - if !html.blank? && (html.index("<a") || html.index("<href")) && html.index(">") - tokenizer = HTML::Tokenizer.new(html) - result = returning [] do |result| - while token = tokenizer.next - node = HTML::Node.parse(nil, 0, 0, token, false) - result << node.to_s unless node.is_a?(HTML::Tag) && ["a", "href"].include?(node.name) - end - end.join - result == html ? result : strip_links(result) # Recurse - handle all dirty nested links - else - html - end + self.class.link_sanitizer.sanitize(html) end - # A regular expression of the valid characters used to separate protocols like - # the ':' in 'http://foo.com' - @@sanitized_protocol_separator = /:|(�*58)|(p)|(%|%)3A/ - mattr_accessor :sanitized_protocol_separator, :instance_writer => false - - # Specifies a Set of HTML attributes that can have URIs. - @@sanitized_uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc)) - mattr_reader :sanitized_uri_attributes - - # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed - # to just escaping harmless tags like <font> - @@sanitized_bad_tags = Set.new(%w(script)) - mattr_reader :sanitized_bad_tags - - # Specifies the default Set of tags that the #sanitize helper will allow unscathed. - @@sanitized_allowed_tags = Set.new(%w(strong em b i p code pre tt output samp kbd var sub - sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr - acronym a img blockquote del ins fieldset legend)) - mattr_reader :sanitized_allowed_tags - - # Specifies the default Set of html attributes that the #sanitize helper will leave - # in the allowed tag. - @@sanitized_allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) - mattr_reader :sanitized_allowed_attributes - - # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. - @@sanitized_allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse - border-color border-left-color border-right-color border-top-color clear color cursor direction display - elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height - overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation - speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space - width)) - mattr_reader :sanitized_allowed_css_properties - - # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. - @@sanitized_allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center - collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal - nowrap olive pointer purple red right solid silver teal top transparent underline white yellow)) - mattr_reader :sanitized_allowed_css_keywords - - # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. - @@sanitized_shorthand_css_properties = Set.new(%w(background border margin padding)) - mattr_reader :sanitized_shorthand_css_properties - - # Specifies the default Set of protocols that the #sanitize helper will leave in - # protocol attributes. - @@sanitized_allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed svn urn aim rsync tag ssh sftp rtsp afs)) - mattr_reader :sanitized_allowed_protocols - module ClassMethods #:nodoc: def self.extended(base) class << base + attr_writer :full_sanitizer, :link_sanitizer, :white_list_sanitizer + # we want these to be class methods on ActionView::Base, they'll get mattr_readers for these below. - [:sanitized_protocol_separator, :sanitized_uri_attributes, :sanitized_bad_tags, :sanitized_allowed_tags, + helper_def = [:sanitized_protocol_separator, :sanitized_uri_attributes, :sanitized_bad_tags, :sanitized_allowed_tags, :sanitized_allowed_attributes, :sanitized_allowed_css_properties, :sanitized_allowed_css_keywords, - :sanitized_shorthand_css_properties, :sanitized_allowed_protocols, :sanitized_protocol_separator=].each do |prop| - delegate prop, :to => SanitizeHelper - end + :sanitized_shorthand_css_properties, :sanitized_allowed_protocols, :sanitized_protocol_separator=].collect! do |prop| + prop = prop.to_s + "def #{prop}(#{:value if prop =~ /=$/}) white_list_sanitizer.#{prop.sub /sanitized_/, ''} #{:value if prop =~ /=$/} end" + end.join("\n") + eval helper_def end end + + # Gets the HTML::FullSanitizer instance used by strip_tags. Replace with + # any object that responds to #sanitize + # + # Rails::Initializer.run do |config| + # config.action_view.full_sanitizer = MySpecialSanitizer.new + # end + # + def full_sanitizer + @full_sanitizer ||= HTML::FullSanitizer.new + end + + # Gets the HTML::LinkSanitizer instance used by strip_links. Replace with + # any object that responds to #sanitize + # + # Rails::Initializer.run do |config| + # config.action_view.link_sanitizer = MySpecialSanitizer.new + # end + # + def link_sanitizer + @link_sanitizer ||= HTML::LinkSanitizer.new + end + + # Gets the HTML::WhiteListSanitizer instance used by sanitize and sanitize_css. + # Replace with any object that responds to #sanitize + # + # Rails::Initializer.run do |config| + # config.action_view.white_list_sanitizer = MySpecialSanitizer.new + # end + # + def white_list_sanitizer + @white_list_sanitizer ||= HTML::WhiteListSanitizer.new + end # Adds valid HTML attributes that the #sanitize helper checks for URIs. # @@ -242,7 +146,7 @@ module ActionView # end # def sanitized_uri_attributes=(attributes) - Helpers::SanitizeHelper.sanitized_uri_attributes.merge(attributes) + HTML::WhiteListSanitizer.uri_attributes.merge(attributes) end # Adds to the Set of 'bad' tags for the #sanitize helper. @@ -252,7 +156,7 @@ module ActionView # end # def sanitized_bad_tags=(attributes) - Helpers::SanitizeHelper.sanitized_bad_tags.merge(attributes) + HTML::WhiteListSanitizer.bad_tags.merge(attributes) end # Adds to the Set of allowed tags for the #sanitize helper. # @@ -261,7 +165,7 @@ module ActionView # end # def sanitized_allowed_tags=(attributes) - Helpers::SanitizeHelper.sanitized_allowed_tags.merge(attributes) + HTML::WhiteListSanitizer.allowed_tags.merge(attributes) end # Adds to the Set of allowed html attributes for the #sanitize helper. @@ -271,7 +175,7 @@ module ActionView # end # def sanitized_allowed_attributes=(attributes) - Helpers::SanitizeHelper.sanitized_allowed_attributes.merge(attributes) + HTML::WhiteListSanitizer.allowed_attributes.merge(attributes) end # Adds to the Set of allowed css properties for the #sanitize and #sanitize_css heleprs. @@ -281,7 +185,7 @@ module ActionView # end # def sanitized_allowed_css_properties=(attributes) - Helpers::SanitizeHelper.sanitized_allowed_css_properties.merge(attributes) + HTML::WhiteListSanitizer.allowed_css_properties.merge(attributes) end # Adds to the Set of allowed css keywords for the #sanitize and #sanitize_css helpers. @@ -291,7 +195,7 @@ module ActionView # end # def sanitized_allowed_css_keywords=(attributes) - Helpers::SanitizeHelper.sanitized_allowed_css_keywords.merge(attributes) + HTML::WhiteListSanitizer.allowed_css_keywords.merge(attributes) end # Adds to the Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. @@ -301,7 +205,7 @@ module ActionView # end # def sanitized_shorthand_css_properties=(attributes) - Helpers::SanitizeHelper.sanitized_shorthand_css_properties.merge(attributes) + HTML::WhiteListSanitizer.shorthand_css_properties.merge(attributes) end # Adds to the Set of allowed protocols for the #sanitize helper. @@ -311,15 +215,9 @@ module ActionView # end # def sanitized_allowed_protocols=(attributes) - Helpers::SanitizeHelper.sanitized_allowed_protocols.merge(attributes) + HTML::WhiteListSanitizer.allowed_protocols.merge(attributes) end end - - private - def contains_bad_protocols?(attr_name, value) - sanitized_uri_attributes.include?(attr_name) && - (value =~ /(^[^\/:]*):|(�*58)|(p)|(%|%)3A/ && !sanitized_allowed_protocols.include?(value.split(sanitized_protocol_separator).first)) - end end end end diff --git a/actionpack/test/controller/html-scanner/sanitizer_test.rb b/actionpack/test/controller/html-scanner/sanitizer_test.rb new file mode 100644 index 0000000000..9a7de9ad86 --- /dev/null +++ b/actionpack/test/controller/html-scanner/sanitizer_test.rb @@ -0,0 +1,244 @@ +require File.dirname(__FILE__) + '/../../abstract_unit' +require 'test/unit' + +class SanitizerTest < Test::Unit::TestCase + def setup + @sanitizer = nil # used by assert_sanitizer + end + + def test_strip_tags + sanitizer = HTML::FullSanitizer.new + assert_equal("<<<bad html", sanitizer.sanitize("<<<bad html")) + assert_equal("<<", sanitizer.sanitize("<<<bad html>")) + assert_equal("Dont touch me", sanitizer.sanitize("Dont touch me")) + assert_equal("This is a test.", sanitizer.sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>")) + assert_equal("Weirdos", sanitizer.sanitize("Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos")) + assert_equal("This is a test.", sanitizer.sanitize("This is a test.")) + assert_equal( + %{This is a test.\n\n\nIt no longer contains any HTML.\n}, sanitizer.sanitize( + %{<title>This is <b>a <a href="" target="_blank">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n})) + assert_equal "This has a here.", sanitizer.sanitize("This has a <!-- comment --> here.") + [nil, '', ' '].each { |blank| assert_equal blank, sanitizer.sanitize(blank) } + end + + def test_strip_links + sanitizer = HTML::LinkSanitizer.new + assert_equal "Dont touch me", sanitizer.sanitize("Dont touch me") + assert_equal "on my mind\nall day long", sanitizer.sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>") + assert_equal "0wn3d", sanitizer.sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>") + assert_equal "Magic", sanitizer.sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic") + assert_equal "FrrFox", sanitizer.sanitize("<href onlclick='steal()'>FrrFox</a></href>") + assert_equal "My mind\nall <b>day</b> long", sanitizer.sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>") + assert_equal "all <b>day</b> long", sanitizer.sanitize("<<a>a href='hello'>all <b>day</b> long<</A>/a>") + + assert_equal "<a<a", sanitizer.sanitize("<a<a") + end + + def test_sanitize_form + assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", '' + end + + def test_sanitize_plaintext + raw = "<plaintext><span>foo</span></plaintext>" + assert_sanitized raw, "<span>foo</span>" + end + + def test_sanitize_script + assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cd e f" + end + + # fucked + def test_sanitize_js_handlers + raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>} + assert_sanitized raw, %{onthis="do that" <a name="foo" href="#">hello</a>} + end + + def test_sanitize_javascript_href + raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>} + assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>} + end + + def test_sanitize_image_src + raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>} + assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>} + end + + HTML::WhiteListSanitizer.allowed_tags.each do |tag_name| + define_method "test_should_allow_#{tag_name}_tag" do + assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end) + end + end + + def test_should_allow_anchors + assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href="foo"></a>) + end + + # RFC 3986, sec 4.2 + def test_allow_colons_in_path_component + assert_sanitized("<a href=\"./this:that\">foo</a>") + end + + %w(src width height alt).each do |img_attr| + define_method "test_should_allow_image_#{img_attr}_attribute" do + assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />) + end + end + + def test_should_handle_non_html + assert_sanitized 'abc' + end + + def test_should_handle_blank_text + assert_sanitized nil + assert_sanitized '' + end + + def test_should_allow_custom_tags + text = "<u>foo</u>" + sanitizer = HTML::WhiteListSanitizer.new + assert_equal(text, sanitizer.sanitize(text, :tags => %w(u))) + end + + def test_should_allow_only_custom_tags + text = "<u>foo</u> with <i>bar</i>" + sanitizer = HTML::WhiteListSanitizer.new + assert_equal("<u>foo</u> with bar", sanitizer.sanitize(text, :tags => %w(u))) + end + + def test_should_allow_custom_tags_with_attributes + text = %(<fieldset foo="bar">foo</fieldset>) + sanitizer = HTML::WhiteListSanitizer.new + assert_equal(text, sanitizer.sanitize(text, :attributes => ['foo'])) + end + + [%w(img src), %w(a href)].each do |(tag, attr)| + define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do + assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>) + end + end + + def test_should_flag_bad_protocols + sanitizer = HTML::WhiteListSanitizer.new + %w(about chrome data disk hcp help javascript livescript lynxcgi lynxexec ms-help ms-its mhtml mocha opera res resource shell vbscript view-source vnd.ms.radio wysiwyg).each do |proto| + assert sanitizer.send(:contains_bad_protocols?, 'src', "#{proto}://bad") + end + end + + def test_should_accept_good_protocols + sanitizer = HTML::WhiteListSanitizer.new + HTML::WhiteListSanitizer.allowed_protocols.each do |proto| + assert !sanitizer.send(:contains_bad_protocols?, 'src', "#{proto}://good") + end + end + + def test_should_reject_hex_codes_in_protocol + assert_sanitized %(<a href="%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29">1</a>), "<a>1</a>" + assert @sanitizer.send(:contains_bad_protocols?, 'src', "%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29") + end + + def test_should_block_script_tag + assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), "" + end + + [%(<IMG SRC="javascript:alert('XSS');">), + %(<IMG SRC=javascript:alert('XSS')>), + %(<IMG SRC=JaVaScRiPt:alert('XSS')>), + %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), + %(<IMG SRC=javascript:alert("XSS")>), + %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>), + %(<IMG SRC=javascript:alert('XSS')>), + %(<IMG SRC=javascript:alert('XSS')>), + %(<IMG SRC=javascript:alert('XSS')>), + %(<IMG SRC="jav\tascript:alert('XSS');">), + %(<IMG SRC="jav	ascript:alert('XSS');">), + %(<IMG SRC="jav
ascript:alert('XSS');">), + %(<IMG SRC="jav
ascript:alert('XSS');">), + %(<IMG SRC="  javascript:alert('XSS');">), + %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each_with_index do |img_hack, i| + define_method "test_should_not_fall_for_xss_image_hack_#{i+1}" do + assert_sanitized img_hack, "<img>" + end + end + + def test_should_sanitize_tag_broken_up_by_null + assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), "alert(\"XSS\")" + end + + def test_should_sanitize_invalid_script_tag + assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), "" + end + + def test_should_sanitize_script_tag_with_multiple_open_brackets + assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "<" + assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), %(<a) + end + + def test_should_sanitize_unclosed_script + assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), "<b>" + end + + def test_should_sanitize_half_open_scripts + assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>" + end + + def test_should_not_fall_for_ridiculous_hack + img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>) + assert_sanitized img_hack, "<img>" + end + + # fucked + def test_should_sanitize_attributes + assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="'><script>alert()</script>">blah</span>) + end + + def test_should_sanitize_illegal_style_properties + raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;) + expected = %(display: block; width: 100%; height: 100%; background-color: black; background-image: ; background-x: center; background-y: center;) + assert_equal expected, sanitize_css(raw) + end + + def test_should_sanitize_xul_style_attributes + raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')) + assert_equal '', sanitize_css(raw) + end + + def test_should_sanitize_invalid_tag_names + assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f") + end + + def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags + assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>") + end + + def test_should_sanitize_invalid_tag_names_in_single_tags + assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />") + end + + def test_should_sanitize_img_dynsrc_lowsrc + assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />") + end + + def test_should_sanitize_div_background_image_unicode_encoded + raw = %(background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029) + assert_equal '', sanitize_css(raw) + end + + def test_should_sanitize_div_style_expression + raw = %(width: expression(alert('XSS'));) + assert_equal '', sanitize_css(raw) + end + + def test_should_sanitize_img_vbscript + assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />' + end + +protected + def assert_sanitized(input, expected = nil) + @sanitizer ||= HTML::WhiteListSanitizer.new + assert_equal expected || input, @sanitizer.sanitize(input) + end + + def sanitize_css(input) + (@sanitizer ||= HTML::WhiteListSanitizer.new).sanitize_css(input) + end +end
\ No newline at end of file diff --git a/actionpack/test/template/sanitize_helper_test.rb b/actionpack/test/template/sanitize_helper_test.rb index 9f039e287f..7f2d2d9cc0 100644 --- a/actionpack/test/template/sanitize_helper_test.rb +++ b/actionpack/test/template/sanitize_helper_test.rb @@ -1,6 +1,8 @@ require "#{File.dirname(__FILE__)}/../abstract_unit" require "#{File.dirname(__FILE__)}/../testing_sandbox" +# The exhaustive tests are in test/controller/html/sanitizer_test.rb. +# This tests the that the helpers hook up correctly to the sanitizer classes. class SanitizeHelperTest < Test::Unit::TestCase include ActionView::Helpers::SanitizeHelper include ActionView::Helpers::TagHelper @@ -21,199 +23,12 @@ class SanitizeHelperTest < Test::Unit::TestCase assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", '' end - def test_sanitize_plaintext - raw = "<plaintext><span>foo</span></plaintext>" - assert_sanitized raw, "<span>foo</span>" - end - - def test_sanitize_script - raw = "a b c<script language=\"Javascript\">blah blah blah</script>d e f" - assert_sanitized raw, "a b cd e f" - end - - def test_sanitize_js_handlers - raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>} - assert_sanitized raw, %{onthis="do that" <a name="foo" href="#">hello</a>} - end - - def test_sanitize_javascript_href - raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>} - assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>} - end - - def test_sanitize_image_src - raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>} - assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>} - end - - ActionView::Helpers::SanitizeHelper.sanitized_allowed_tags.each do |tag_name| - define_method "test_should_allow_#{tag_name}_tag" do - assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end) - end - end - - def test_should_allow_anchors - assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href="foo"></a>) - end - - # RFC 3986, sec 4.2 - def test_allow_colons_in_path_component - assert_sanitized("<a href=\"./this:that\">foo</a>") - end - - %w(src width height alt).each do |img_attr| - define_method "test_should_allow_image_#{img_attr}_attribute" do - assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />) - end - end - - def test_should_handle_non_html - assert_sanitized 'abc' - end - - def test_should_handle_blank_text - assert_sanitized nil - assert_sanitized '' - end - - def test_should_allow_custom_tags - text = "<u>foo</u>" - assert_equal(text, sanitize(text, :tags => %w(u))) - end - - def test_should_allow_only_custom_tags - text = "<u>foo</u> with <i>bar</i>" - assert_equal("<u>foo</u> with bar", sanitize(text, :tags => %w(u))) - end - - def test_should_allow_custom_tags_with_attributes - text = %(<fieldset foo="bar">foo</fieldset>) - assert_equal(text, sanitize(text, :attributes => ['foo'])) - end - - [%w(img src), %w(a href)].each do |(tag, attr)| - define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do - assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>) - end - end - - def test_should_flag_bad_protocols - %w(about chrome data disk hcp help javascript livescript lynxcgi lynxexec ms-help ms-its mhtml mocha opera res resource shell vbscript view-source vnd.ms.radio wysiwyg).each do |proto| - assert contains_bad_protocols?('src', "#{proto}://bad") - end - end - - def test_should_accept_good_protocols - sanitized_allowed_protocols.each do |proto| - assert !contains_bad_protocols?('src', "#{proto}://good") - end - end - - def test_should_reject_hex_codes_in_protocol - assert contains_bad_protocols?('src', "%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29") - assert_sanitized %(<a href="%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29">1</a>), "<a>1</a>" - end - - def test_should_block_script_tag - assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), "" - end - - [%(<IMG SRC="javascript:alert('XSS');">), - %(<IMG SRC=javascript:alert('XSS')>), - %(<IMG SRC=JaVaScRiPt:alert('XSS')>), - %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), - %(<IMG SRC=javascript:alert("XSS")>), - %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>), - %(<IMG SRC=javascript:alert('XSS')>), - %(<IMG SRC=javascript:alert('XSS')>), - %(<IMG SRC=javascript:alert('XSS')>), - %(<IMG SRC="jav\tascript:alert('XSS');">), - %(<IMG SRC="jav	ascript:alert('XSS');">), - %(<IMG SRC="jav
ascript:alert('XSS');">), - %(<IMG SRC="jav
ascript:alert('XSS');">), - %(<IMG SRC="  javascript:alert('XSS');">), - %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each_with_index do |img_hack, i| - define_method "test_should_not_fall_for_xss_image_hack_#{i+1}" do - assert_sanitized img_hack, "<img>" - end - end - - def test_should_sanitize_tag_broken_up_by_null - assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), "alert(\"XSS\")" - end - - def test_should_sanitize_invalid_script_tag - assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), "" - end - - def test_should_sanitize_script_tag_with_multiple_open_brackets - assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "<" - assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), %(<a) - end - - def test_should_sanitize_unclosed_script - assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), "<b>" - end - - def test_should_sanitize_half_open_scripts - assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>" - end - - def test_should_not_fall_for_ridiculous_hack - img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>) - assert_sanitized img_hack, "<img>" - end - - def test_should_sanitize_attributes - assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="'><script>alert()</script>">blah</span>) - end - def test_should_sanitize_illegal_style_properties raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;) expected = %(display: block; width: 100%; height: 100%; background-color: black; background-image: ; background-x: center; background-y: center;) assert_equal expected, sanitize_css(raw) end - def test_should_sanitize_xul_style_attributes - raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')) - assert_equal '', sanitize_css(raw) - end - - def test_should_sanitize_invalid_tag_names - assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f") - end - - def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags - assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>") - end - - def test_should_sanitize_invalid_tag_names_in_single_tags - assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />") - end - - def test_should_sanitize_img_dynsrc_lowsrc - assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />") - end - - def test_should_sanitize_div_background_image_unicode_encoded - raw = %(background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029) - assert_equal '', sanitize_css(raw) - end - - def test_should_sanitize_div_style_expression - raw = %(width: expression(alert('XSS'));) - assert_equal '', sanitize_css(raw) - end - - def test_should_sanitize_style_attribute - raw = %(<div style="display:block; background:url(http://rubyonrails.com); background-image: url(rubyonrails)">foo</div>) - assert_equal %(<div style="display: block; background: ; background-image: ;">foo</div>), sanitize(raw, :attributes => 'style') - end - - def test_should_sanitize_img_vbscript - assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />' - end - def test_strip_tags assert_equal("<<<bad html", strip_tags("<<<bad html")) assert_equal("<<", strip_tags("<<<bad html>")) |