aboutsummaryrefslogtreecommitdiffstats
path: root/actionpack/lib/action_view/helpers
diff options
context:
space:
mode:
Diffstat (limited to 'actionpack/lib/action_view/helpers')
-rw-r--r--actionpack/lib/action_view/helpers/text_helper.rb55
1 files changed, 55 insertions, 0 deletions
diff --git a/actionpack/lib/action_view/helpers/text_helper.rb b/actionpack/lib/action_view/helpers/text_helper.rb
index 6b89bec9f2..2cc4b68ec0 100644
--- a/actionpack/lib/action_view/helpers/text_helper.rb
+++ b/actionpack/lib/action_view/helpers/text_helper.rb
@@ -128,6 +128,61 @@ module ActionView
def strip_links(text)
text.gsub(/<a.*>(.*)<\/a>/m, '\1')
end
+
+ # Try to require the html-scanner library
+ begin
+ require 'html/tokenizer'
+ require 'html/node'
+ rescue LoadError
+ # if there isn't a copy installed, use the vendor version in
+ # action controller
+ $:.unshift File.join(File.dirname(__FILE__), "..", "..",
+ "action_controller", "vendor", "html-scanner")
+ require 'html/tokenizer'
+ require 'html/node'
+ end
+
+ VERBOTEN_TAGS = %w(form script) unless defined?(VERBOTEN_TAGS)
+ VERBOTEN_ATTRS = /^on/i unless defined?(VERBOTEN_ATTRS)
+
+ # Sanitizes the given HTML by making form and script tags into regular
+ # text, and removing all "onxxx" attributes (so that arbitrary Javascript
+ # cannot be executed). Also removes href attributes that start with
+ # "javascript:".
+ #
+ # Returns the sanitized text.
+ def sanitize(html)
+ # only do this if absolutely necessary
+ if html.index("<")
+ tokenizer = HTML::Tokenizer.new(html)
+ new_text = ""
+
+ while token = tokenizer.next
+ node = HTML::Node.parse(nil, 0, 0, token, false)
+ new_text << case node
+ when HTML::Tag
+ if VERBOTEN_TAGS.include?(node.name)
+ node.to_s.gsub(/</, "&lt;")
+ else
+ if node.closing != :close
+ node.attributes.delete_if { |attr,v| attr =~ VERBOTEN_ATTRS }
+ if node.attributes["href"] =~ /^javascript:/i
+ node.attributes.delete "href"
+ end
+ end
+ node.to_s
+ end
+ else
+ node.to_s.gsub(/</, "&lt;")
+ end
+ end
+
+ html = new_text
+ end
+
+ html
+ end
+
private
# Returns a version of the text that's safe to use in a regular expression without triggering engine features.