Added TextHelper#sanitize that can will remove any Javascript handlers, blocks, and forms from an input of HTML. This allows for use of HTML on public sites, but still be free of XSS issues. #1277 [Jamis Buck]

git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@1298 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
author: David Heinemeier Hansson <david@loudthinking.com> 2005-05-09 11:24:18 +0000
committer: David Heinemeier Hansson <david@loudthinking.com> 2005-05-09 11:24:18 +0000
commit: 45780be2a7d6ddb5851e04279728c817c941c31c (patch)
tree: 142055ea22c6e5caca67108b59b4087f3053e3ff /actionpack/lib/action_view/helpers
parent: b167248b21a8da63be871ec6815d117a8efa25f3 (diff)
download: rails-45780be2a7d6ddb5851e04279728c817c941c31c.tar.gz
rails-45780be2a7d6ddb5851e04279728c817c941c31c.tar.bz2
rails-45780be2a7d6ddb5851e04279728c817c941c31c.zip
1 files changed, 55 insertions, 0 deletions
diff --git a/actionpack/lib/action_view/helpers/text_helper.rb b/actionpack/lib/action_view/helpers/text_helper.rb
index 6b89bec9f2..2cc4b68ec0 100644
--- a/actionpack/lib/action_view/helpers/text_helper.rb
+++ b/actionpack/lib/action_view/helpers/text_helper.rb
@@ -128,6 +128,61 @@ module ActionView
       def strip_links(text)
         text.gsub(/<a.*>(.*)<\/a>/m, '\1')
       end
+
+      # Try to require the html-scanner library
+      begin
+        require 'html/tokenizer'
+        require 'html/node'
+      rescue LoadError
+        # if there isn't a copy installed, use the vendor version in
+        # action controller
+        $:.unshift File.join(File.dirname(__FILE__), "..", "..",
+                      "action_controller", "vendor", "html-scanner")
+        require 'html/tokenizer'
+        require 'html/node'
+      end
+
+      VERBOTEN_TAGS = %w(form script) unless defined?(VERBOTEN_TAGS)
+      VERBOTEN_ATTRS = /^on/i unless defined?(VERBOTEN_ATTRS)
+
+      # Sanitizes the given HTML by making form and script tags into regular
+      # text, and removing all "onxxx" attributes (so that arbitrary Javascript
+      # cannot be executed). Also removes href attributes that start with
+      # "javascript:".
+      #
+      # Returns the sanitized text.
+      def sanitize(html)
+        # only do this if absolutely necessary
+        if html.index("<")
+          tokenizer = HTML::Tokenizer.new(html)
+          new_text = ""
+
+          while token = tokenizer.next
+            node = HTML::Node.parse(nil, 0, 0, token, false)
+            new_text << case node
+              when HTML::Tag
+                if VERBOTEN_TAGS.include?(node.name)
+                  node.to_s.gsub(/</, "&lt;")
+                else
+                  if node.closing != :close
+                    node.attributes.delete_if { |attr,v| attr =~ VERBOTEN_ATTRS }
+                    if node.attributes["href"] =~ /^javascript:/i
+                      node.attributes.delete "href"
+                    end
+                  end
+                  node.to_s
+                end
+              else
+                node.to_s.gsub(/</, "&lt;")
+            end
+          end
+
+          html = new_text
+        end
+
+        html
+      end
+
       
       private
         # Returns a version of the text that's safe to use in a regular expression without triggering engine features.
author	David Heinemeier Hansson <david@loudthinking.com>	2005-05-09 11:24:18 +0000
committer	David Heinemeier Hansson <david@loudthinking.com>	2005-05-09 11:24:18 +0000
commit	45780be2a7d6ddb5851e04279728c817c941c31c (patch)
tree	142055ea22c6e5caca67108b59b4087f3053e3ff /actionpack/lib/action_view/helpers
parent	b167248b21a8da63be871ec6815d117a8efa25f3 (diff)
download	rails-45780be2a7d6ddb5851e04279728c817c941c31c.tar.gz rails-45780be2a7d6ddb5851e04279728c817c941c31c.tar.bz2 rails-45780be2a7d6ddb5851e04279728c817c941c31c.zip