From 2d02199e1581db8dc84361803950b1697f493fc0 Mon Sep 17 00:00:00 2001
From: Rick Olson <technoweenie@gmail.com>
Date: Sun, 23 Sep 2007 00:11:08 +0000
Subject: Secure #sanitize, #strip_tags, and #strip_links helpers against xss
 attacks.  Closes #8877. [Rick, lifofifo, Jacques Distler]

git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@7589 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
---
 actionpack/CHANGELOG                              |   5 +
 actionpack/lib/action_view/base.rb                | 129 ++++++++++++++
 actionpack/lib/action_view/helpers/text_helper.rb | 136 ++++++++++----
 actionpack/test/template/text_helper_test.rb      | 206 ++++++++++++++++++++--
 4 files changed, 423 insertions(+), 53 deletions(-)

diff --git a/actionpack/CHANGELOG b/actionpack/CHANGELOG
index df25ec800f..b684148f91 100644
--- a/actionpack/CHANGELOG
+++ b/actionpack/CHANGELOG
@@ -1,5 +1,10 @@
 *SVN*
 
+* Secure #sanitize, #strip_tags, and #strip_links helpers against xss attacks.  Closes #8877. [Rick, lifofifo, Jacques Distler]
+
+  This merges and renames the popular white_list helper (along with some css sanitizing from Jacques Distler version of the same plugin).
+  Also applied updated versions of #strip_tags and #strip_links from #8877.
+
 * Remove use of & logic operator. Closes #8114. [watson]
 
 * Fixed JavaScriptHelper#escape_javascript to also escape closing tags #8023 [rubyruy]
diff --git a/actionpack/lib/action_view/base.rb b/actionpack/lib/action_view/base.rb
index 14c42ce855..8e778f6830 100644
--- a/actionpack/lib/action_view/base.rb
+++ b/actionpack/lib/action_view/base.rb
@@ -198,6 +198,135 @@ module ActionView #:nodoc:
     
     @@erb_variable = '_erbout'
     cattr_accessor :erb_variable
+    
+    # A regular expression of the valid characters used to separate protocols like
+    # the ':' in 'http://foo.com'
+    @@sanitized_protocol_separator = /:|(&#0*58)|(&#x70)|(%|&#37;)3A/
+    cattr_accessor :sanitized_protocol_separator
+
+    # Specifies a Set of HTML attributes that can have URIs.
+    @@sanitized_uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc))
+    cattr_reader :sanitized_uri_attributes
+
+    # Adds valid HTML attributes that the #sanitize helper checks for URIs.
+    #
+    #   Rails::Initializer.run do |config|
+    #     config.action_view.sanitized_uri_attributes = 'lowsrc', 'target'
+    #   end
+    #
+    def self.sanitized_uri_attributes=(attributes)
+      @@sanitized_uri_attributes.merge(attributes)
+    end
+
+    # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed
+    # to just escaping harmless tags like &lt;font&gt;
+    @@sanitized_bad_tags = Set.new('script')
+    cattr_reader :sanitized_bad_tags
+    
+    # Adds to the Set of 'bad' tags for the #sanitize helper.
+    #
+    #   Rails::Initializer.run do |config|
+    #     config.action_view.sanitized_bad_tags = 'embed', 'object'
+    #   end
+    #
+    def self.sanitized_bad_tags=(attributes)
+      @@sanitized_bad_tags.merge(attributes)
+    end
+    
+    # Specifies the default Set of tags that the #sanitize helper will allow unscathed.
+    @@sanitized_allowed_tags = Set.new(%w(strong em b i p code pre tt output samp kbd var sub 
+      sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr 
+      acronym a img blockquote del ins fieldset legend))
+    cattr_reader :sanitized_allowed_tags
+
+    # Adds to the Set of allowed tags for the #sanitize helper.
+    #
+    #   Rails::Initializer.run do |config|
+    #     config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td'
+    #   end
+    #
+    def self.sanitized_allowed_tags=(attributes)
+      @@sanitized_allowed_tags.merge(attributes)
+    end
+
+    # Specifies the default Set of html attributes that the #sanitize helper will leave 
+    # in the allowed tag.
+    @@sanitized_allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
+    cattr_reader :sanitized_allowed_attributes
+
+    # Adds to the Set of allowed html attributes for the #sanitize helper.
+    #
+    #   Rails::Initializer.run do |config|
+    #     config.action_view.sanitized_allowed_attributes = 'onclick', 'longdesc'
+    #   end
+    #
+    def self.sanitized_allowed_attributes=(attributes)
+      @@sanitized_allowed_attributes.merge(attributes)
+    end
+
+    # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
+    @@sanitized_allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse 
+      border-color border-left-color border-right-color border-top-color clear color cursor direction display 
+      elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height
+      overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation
+      speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space
+      width))
+    cattr_reader :sanitized_allowed_css_properties
+
+    # Adds to the Set of allowed css properties for the #sanitize and #sanitize_css heleprs.
+    #
+    #   Rails::Initializer.run do |config|
+    #     config.action_view.sanitized_allowed_css_properties = 'expression'
+    #   end
+    #
+    def self.sanitized_allowed_css_properties=(attributes)
+      @@sanitized_allowed_css_properties.merge(attributes)
+    end
+    
+    # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
+    @@sanitized_allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center
+      collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal
+      nowrap olive pointer purple red right solid silver teal top transparent underline white yellow))
+    cattr_reader :sanitized_allowed_css_keywords
+
+    # Adds to the Set of allowed css keywords for the #sanitize and #sanitize_css helpers.
+    #
+    #   Rails::Initializer.run do |config|
+    #     config.action_view.sanitized_allowed_css_keywords = 'expression'
+    #   end
+    #
+    def self.sanitized_allowed_css_keywords=(attributes)
+      @@sanitized_allowed_css_keywords.merge(attributes)
+    end
+    
+    # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
+    @@sanitized_shorthand_css_properties = Set.new(%w(background border margin padding))
+    cattr_reader :sanitized_shorthand_css_properties
+
+    # Adds to the Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
+    #
+    #   Rails::Initializer.run do |config|
+    #     config.action_view.sanitized_shorthand_css_properties = 'expression'
+    #   end
+    #
+    def self.sanitized_shorthand_css_properties=(attributes)
+      @@sanitized_shorthand_css_properties.merge(attributes)
+    end
+
+    # Specifies the default Set of protocols that the #sanitize helper will leave in
+    # protocol attributes.
+    @@sanitized_allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed svn urn aim rsync tag ssh sftp rtsp afs))
+    cattr_reader :sanitized_allowed_protocols
+
+    # Adds to the Set of allowed protocols for the #sanitize helper.
+    #
+    #   Rails::Initializer.run do |config|
+    #     config.action_view.sanitized_allowed_protocols = 'ssh', 'feed'
+    #   end
+    #
+    def self.sanitized_allowed_protocols=(attributes)
+      @@sanitized_allowed_protocols.merge(attributes)
+    end
 
     @@template_handlers = HashWithIndifferentAccess.new
  
diff --git a/actionpack/lib/action_view/helpers/text_helper.rb b/actionpack/lib/action_view/helpers/text_helper.rb
index e7a6303154..af6f6e4bb8 100644
--- a/actionpack/lib/action_view/helpers/text_helper.rb
+++ b/actionpack/lib/action_view/helpers/text_helper.rb
@@ -324,63 +324,118 @@ module ActionView
       #
       #   strip_links('Blog: <a href="http://www.myblog.com/" class="nav" target=\"_blank\">Visit</a>.')
       #   # => Blog: Visit
-      def strip_links(text)
-        text.gsub(/<a\b.*?>(.*?)<\/a>/mi, '\1')
+      def strip_links(html)
+        # Stupid firefox treats '<href="http://whatever.com" onClick="alert()">something' as link! 
+        if html.index("<a") || html.index("<href")   
+          tokenizer = HTML::Tokenizer.new(html) 
+          result = ''
+          while token = tokenizer.next 
+            node = HTML::Node.parse(nil, 0, 0, token, false) 
+            result << node.to_s unless node.is_a?(HTML::Tag) && ["a", "href"].include?(node.name) 
+          end 
+          strip_links(result) # Recurse - handle all dirty nested links
+        else
+          html
+        end
       end
 
-      VERBOTEN_TAGS = %w(form script plaintext) unless defined?(VERBOTEN_TAGS)
-      VERBOTEN_ATTRS = /^on/i unless defined?(VERBOTEN_ATTRS)
-
-      # Sanitizes the +html+ by converting <form> and <script> tags into regular
-      # text, and removing all "on*" (e.g., onClick) attributes so that arbitrary Javascript
-      # cannot be executed. It also removes <tt>href</tt> and <tt>src</tt> attributes that start with
-      # "javascript:". You can modify what gets sanitized by defining VERBOTEN_TAGS
-      # and VERBOTEN_ATTRS before this Module is loaded.
+      # This #sanitize helper will html encode all tags and strip all attributes that aren't specifically allowed.  
+      # It also strips href/src tags with invalid protocols, like javascript: especially.  It does its best to counter any
+      # tricks that hackers may use, like throwing in unicode/ascii/hex values to get past the javascript: filters.  Check out
+      # the extensive test suite.
       #
-      # ==== Examples
-      #   sanitize('<script> do_nasty_stuff() </script>')
-      #   # => &lt;script> do_nasty_stuff() &lt;/script>
+      #   <%= sanitize @article.body %>
+      # 
+      # You can add or remove tags/attributes if you want to customize it a bit.  See ActionView::Base for full docs on the
+      # available options.  You can add tags/attributes for single uses of #sanitize by passing either the :attributes or :tags options:
       #
-      #   sanitize('<a href="javascript: sucker();">Click here for $100</a>')
-      #   # => <a>Click here for $100</a>
+      # Normal Use
       #
-      #   sanitize('<a href="#" onClick="kill_all_humans();">Click here!!!</a>')
-      #   # => <a href="#">Click here!!!</a>
+      #   <%= sanitize @article.body %>
       #
-      #   sanitize('<img src="javascript:suckers_run_this();" />')
-      #   # => <img />
-      def sanitize(html)
-        # only do this if absolutely necessary
-        if html.index("<")
+      # Custom Use
+      #
+      #   <%= sanitize @article.body, :tags => %w(table tr td), :attributes => %w(id class style)
+      # 
+      # Add table tags
+      #   
+      #   Rails::Initializer.run do |config|
+      #     config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td'
+      #   end
+      # 
+      # Remove tags
+      #   
+      #   Rails::Initializer.run do |config|
+      #     config.after_initialize do
+      #       ActionView::Base.sanitized_allowed_tags.delete 'div'
+      #     end
+      #   end
+      # 
+      # Change allowed attributes
+      # 
+      #   Rails::Initializer.run do |config|
+      #     config.action_view.sanitized_allowed_attributes = 'id', 'class', 'style'
+      #   end
+      # 
+      def sanitize(html, options = {})
+        return html if html.blank? || !html.include?('<')
+        attrs = options.key?(:attributes) ? Set.new(options[:attributes]).merge(sanitized_allowed_attributes) : sanitized_allowed_attributes
+        tags  = options.key?(:tags)       ? Set.new(options[:tags]      ).merge(sanitized_allowed_tags)       : sanitized_allowed_tags
+        returning [] do |new_text|
           tokenizer = HTML::Tokenizer.new(html)
-          new_text = ""
-
+          parent    = [] 
           while token = tokenizer.next
             node = HTML::Node.parse(nil, 0, 0, token, false)
             new_text << case node
               when HTML::Tag
-                if VERBOTEN_TAGS.include?(node.name)
-                  node.to_s.gsub(/</, "&lt;")
+                if node.closing == :close
+                  parent.shift
                 else
-                  if node.closing != :close
-                    node.attributes.delete_if { |attr,v| attr =~ VERBOTEN_ATTRS }
-                    %w(href src).each do |attr|
-                      node.attributes.delete attr if node.attributes[attr] =~ /^javascript:/i
-                    end
-                  end
-                  node.to_s
+                  parent.unshift node.name
                 end
+                node.attributes.keys.each do |attr_name|
+                  value = node.attributes[attr_name].to_s
+                  if !attrs.include?(attr_name) || contains_bad_protocols?(attr_name, value)
+                    node.attributes.delete(attr_name)
+                  else
+                    node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(value)
+                  end
+                end if node.attributes
+                tags.include?(node.name) ? node : nil
               else
-                node.to_s.gsub(/</, "&lt;")
+                sanitized_bad_tags.include?(parent.first) ? nil : node.to_s.gsub(/</, "&lt;")
             end
           end
+        end.join
+      end
 
-          html = new_text
+      # Sanitizes a block of css code.  Used by #sanitize when it comes across a style attribute
+      def sanitize_css(style)
+        # disallow urls
+        style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
+
+        # gauntlet
+        if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ ||
+            style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/
+          return ''
         end
 
-        html
+        returning [] do |clean|
+          style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
+            if sanitized_allowed_css_properties.include?(prop.downcase)
+              clean <<  prop + ': ' + val + ';'
+            elsif sanitized_shorthand_css_properties.include?(prop.split('-')[0].downcase) 
+              unless val.split().any? do |keyword|
+                !sanitized_allowed_css_keywords.include?(keyword) && 
+                  keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
+              end
+                clean << prop + ': ' + val + ';'
+              end
+            end
+          end
+        end.join(' ')
       end
-      
+
       # Strips all HTML tags from the +html+, including comments.  This uses the 
       # html-scanner tokenizer and so its HTML parsing ability is limited by 
       # that of html-scanner.
@@ -407,7 +462,7 @@ module ActionView
           end
           # strip any comments, and if they have a newline at the end (ie. line with
           # only a comment) strip that too
-          text.gsub(/<!--(.*?)-->[\n]?/m, "") 
+          strip_tags(text.gsub(/<!--(.*?)-->[\n]?/m, "")) # Recurse - handle all dirty nested tags
         else
           html # already plain text
         end 
@@ -574,6 +629,11 @@ module ActionView
             end
           end
         end
+
+        def contains_bad_protocols?(attr_name, value)
+          sanitized_uri_attributes.include?(attr_name) && 
+          (value =~ /(^[^\/:]*):|(&#0*58)|(&#x70)|(%|&#37;)3A/ && !sanitized_allowed_protocols.include?(value.split(sanitized_protocol_separator).first))
+        end
     end
   end
 end
diff --git a/actionpack/test/template/text_helper_test.rb b/actionpack/test/template/text_helper_test.rb
index 822b88adee..80b9c773b3 100644
--- a/actionpack/test/template/text_helper_test.rb
+++ b/actionpack/test/template/text_helper_test.rb
@@ -5,7 +5,7 @@ class TextHelperTest < Test::Unit::TestCase
   include ActionView::Helpers::TextHelper
   include ActionView::Helpers::TagHelper
   include TestingSandbox
-  
+
   def setup
     # This simulates the fact that instance variables are reset every time
     # a view is rendered.  The cycle helper depends on this behavior.
@@ -47,7 +47,13 @@ class TextHelperTest < Test::Unit::TestCase
   end
   
   def test_strip_links
+    assert_equal "Dont touch me", strip_links("Dont touch me")
     assert_equal "on my mind\nall day long", strip_links("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
+    assert_equal "0wn3d", strip_links("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>") 
+    assert_equal "Magic", strip_links("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic") 
+    assert_equal "FrrFox", strip_links("<href onlclick='steal()'>FrrFox</a></href>") 
+    assert_equal "My mind\nall <b>day</b> long", strip_links("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
+    assert_equal "all <b>day</b> long", strip_links("<<a>a href='hello'>all <b>day</b> long<</A>/a>")
   end
 
   def test_highlighter
@@ -255,41 +261,198 @@ class TextHelperTest < Test::Unit::TestCase
   end
 
   def test_sanitize_form
-    raw = "<form action=\"/foo/bar\" method=\"post\"><input></form>"
-    result = sanitize(raw)
-    assert_equal %(&lt;form action="/foo/bar" method="post"><input>&lt;/form>), result
+    assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ''
   end
 
   def test_sanitize_plaintext
     raw = "<plaintext><span>foo</span></plaintext>"
-    result = sanitize(raw)
-    assert_equal "&lt;plaintext><span>foo</span>&lt;/plaintext>", result
+    assert_sanitized raw, "<span>foo</span>"
   end
 
   def test_sanitize_script
-    raw = "<script language=\"Javascript\">blah blah blah</script>"
-    result = sanitize(raw)
-    assert_equal %{&lt;script language="Javascript">blah blah blah&lt;/script>}, result
+    raw = "a b c<script language=\"Javascript\">blah blah blah</script>d e f"
+    assert_sanitized raw, "a b cd e f"
   end
 
   def test_sanitize_js_handlers
     raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
-    result = sanitize(raw)
-    assert_equal %{onthis="do that" <a name="foo" href="#">hello</a>}, result
+    assert_sanitized raw, %{onthis="do that" <a name="foo" href="#">hello</a>}
   end
 
   def test_sanitize_javascript_href
     raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
-    result = sanitize(raw)
-    assert_equal %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}, result
+    assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
   end
   
   def test_sanitize_image_src
     raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
-    result = sanitize(raw)
-    assert_equal %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>}, result
+    assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>}
+  end
+
+  ActionView::Base.sanitized_allowed_tags.each do |tag_name|
+    define_method "test_should_allow_#{tag_name}_tag" do
+      assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end)
+    end
+  end
+
+  def test_should_allow_anchors
+    assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href="foo"></a>)
+  end
+
+  # RFC 3986, sec 4.2
+  def test_allow_colons_in_path_component
+    assert_sanitized("<a href=\"./this:that\">foo</a>")
+  end
+
+  %w(src width height alt).each do |img_attr|
+    define_method "test_should_allow_image_#{img_attr}_attribute" do
+      assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />)
+    end
+  end
+
+  def test_should_handle_non_html
+    assert_sanitized 'abc'
+  end
+
+  def test_should_handle_blank_text
+    assert_sanitized nil
+    assert_sanitized ''
+  end
+
+  def test_should_allow_custom_tags
+    text = "<u>foo</u>"
+    assert_equal(text, sanitize(text, :tags => %w(u)))
+  end
+
+  def test_should_allow_custom_tags_with_attributes
+    text = %(<fieldset foo="bar">foo</fieldset>)
+    assert_equal(text, sanitize(text, :attributes => ['foo']))
+  end
+
+  [%w(img src), %w(a href)].each do |(tag, attr)|
+    define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
+      assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>)
+    end
+  end
+
+  def test_should_flag_bad_protocols
+    %w(about chrome data disk hcp help javascript livescript lynxcgi lynxexec ms-help ms-its mhtml mocha opera res resource shell vbscript view-source vnd.ms.radio wysiwyg).each do |proto|
+      assert contains_bad_protocols?('src', "#{proto}://bad")
+    end
+  end
+
+  def test_should_accept_good_protocols
+    sanitized_allowed_protocols.each do |proto|
+      assert !contains_bad_protocols?('src', "#{proto}://good")
+    end
+  end
+
+  def test_should_reject_hex_codes_in_protocol
+    assert contains_bad_protocols?('src', "%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29")
+    assert_sanitized %(<a href="&#37;6A&#37;61&#37;76&#37;61&#37;73&#37;63&#37;72&#37;69&#37;70&#37;74&#37;3A&#37;61&#37;6C&#37;65&#37;72&#37;74&#37;28&#37;22&#37;58&#37;53&#37;53&#37;22&#37;29">1</a>), "<a>1</a>"
+  end
+
+  def test_should_block_script_tag
+    assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
+  end
+
+  [%(<IMG SRC="javascript:alert('XSS');">), 
+   %(<IMG SRC=javascript:alert('XSS')>), 
+   %(<IMG SRC=JaVaScRiPt:alert('XSS')>), 
+   %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">),
+   %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>),
+   %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
+   %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>),
+   %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>),
+   %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>),
+   %(<IMG SRC="jav\tascript:alert('XSS');">),
+   %(<IMG SRC="jav&#x09;ascript:alert('XSS');">),
+   %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">),
+   %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">),
+   %(<IMG SRC=" &#14;  javascript:alert('XSS');">),
+   %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each_with_index do |img_hack, i|
+    define_method "test_should_not_fall_for_xss_image_hack_#{i+1}" do
+      assert_sanitized img_hack, "<img>"
+    end
+  end
+  
+  def test_should_sanitize_tag_broken_up_by_null
+    assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), "alert(\"XSS\")"
+  end
+  
+  def test_should_sanitize_invalid_script_tag
+    assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
   end
   
+  def test_should_sanitize_script_tag_with_multiple_open_brackets
+    assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;"
+    assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), %(&lt;a)
+  end
+  
+  def test_should_sanitize_unclosed_script
+    assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), "<b>"
+  end
+  
+  def test_should_sanitize_half_open_scripts
+    assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>"
+  end
+  
+  def test_should_not_fall_for_ridiculous_hack
+    img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
+    assert_sanitized img_hack, "<img>"
+  end
+
+  def test_should_sanitize_attributes
+    assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="'&gt;&lt;script&gt;alert()&lt;/script&gt;">blah</span>)
+  end
+
+  def test_should_sanitize_illegal_style_properties
+    raw      = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
+    expected = %(display: block; width: 100%; height: 100%; background-color: black; background-image: ; background-x: center; background-y: center;)
+    assert_equal expected, sanitize_css(raw)
+  end
+
+  def test_should_sanitize_xul_style_attributes
+    raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
+    assert_equal '', sanitize_css(raw)
+  end
+  
+  def test_should_sanitize_invalid_tag_names
+    assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
+  end
+  
+  def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
+    assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
+  end
+  
+  def test_should_sanitize_invalid_tag_names_in_single_tags
+    assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />")
+  end
+
+  def test_should_sanitize_img_dynsrc_lowsrc
+    assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />")
+  end
+
+  def test_should_sanitize_div_background_image_unicode_encoded
+    raw = %(background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029)
+    assert_equal '', sanitize_css(raw)
+  end
+
+  def test_should_sanitize_div_style_expression
+    raw = %(width: expression(alert('XSS'));)
+    assert_equal '', sanitize_css(raw)
+  end
+  
+  def test_should_sanitize_style_attribute
+    raw = %(<div style="display:block; background:url(http://rubyonrails.com); background-image: url(rubyonrails)">foo</div>)
+    assert_equal %(<div style="display: block; background: ; background-image: ;">foo</div>), sanitize(raw, :attributes => 'style')
+  end
+
+  def test_should_sanitize_img_vbscript
+     assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />'
+  end
+
+
   def test_cycle_class
     value = Cycle.new("one", 2, "3")
     assert_equal("one", value.to_s)
@@ -374,7 +537,9 @@ class TextHelperTest < Test::Unit::TestCase
   end
 
   def test_strip_tags
+    assert_equal("Dont touch me", strip_tags("Dont touch me"))
     assert_equal("This is a test.", strip_tags("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>"))
+    assert_equal("Weirdos", strip_tags("Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"))
     assert_equal("This is a test.", strip_tags("This is a test."))
     assert_equal(
     %{This is a test.\n\n\nIt no longer contains any HTML.\n}, strip_tags(
@@ -382,4 +547,15 @@ class TextHelperTest < Test::Unit::TestCase
     assert_equal "This has a  here.", strip_tags("This has a <!-- comment --> here.")
     [nil, '', '   '].each { |blank| assert_equal blank, strip_tags(blank) }
   end
+
+  def assert_sanitized(text, expected = nil)
+    assert_equal((expected || text), sanitize(text))
+  end
+
+  # pull in configuration values from ActionView::Base
+  [:sanitized_protocol_separator, :sanitized_protocol_attributes, :sanitized_bad_tags, :sanitized_allowed_tags, :sanitized_allowed_attributes, :sanitized_allowed_protocols, :sanitized_allowed_css_properties, :sanitized_allowed_css_keywords, :sanitized_shorthand_css_properties, :sanitized_uri_attributes].each do |attr|
+    define_method attr do
+      ActionView::Base.send(attr)
+    end
+  end
 end
-- 
cgit v1.2.3