From 1af084ecda66a8e1b4eb3a51a07ebca85bf2e419 Mon Sep 17 00:00:00 2001 From: Rick Olson Date: Mon, 26 Nov 2007 03:45:54 +0000 Subject: Refactor sanitizer helpers into HTML classes and make it easy to swap them out with custom implementations. Closes #10129. [rick] git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@8213 5ecf4fe2-1ee6-0310-87b1-e25e094e27de --- .../lib/action_view/helpers/sanitize_helper.rb | 208 ++++++--------------- 1 file changed, 53 insertions(+), 155 deletions(-) (limited to 'actionpack/lib/action_view') diff --git a/actionpack/lib/action_view/helpers/sanitize_helper.rb b/actionpack/lib/action_view/helpers/sanitize_helper.rb index e67abd9f67..47fbe3a27a 100644 --- a/actionpack/lib/action_view/helpers/sanitize_helper.rb +++ b/actionpack/lib/action_view/helpers/sanitize_helper.rb @@ -49,69 +49,12 @@ module ActionView # end # def sanitize(html, options = {}) - return html if html.blank? || !html.include?('<') - - attrs = options[:attributes] || sanitized_allowed_attributes - tags = options[:tags] || sanitized_allowed_tags - - returning [] do |new_text| - tokenizer = HTML::Tokenizer.new(html) - parent = [] - - while token = tokenizer.next - node = HTML::Node.parse(nil, 0, 0, token, false) - - new_text << case node - when HTML::Tag - if node.closing == :close - parent.shift - else - parent.unshift node.name - end - - node.attributes.keys.each do |attr_name| - value = node.attributes[attr_name].to_s - - if !attrs.include?(attr_name) || contains_bad_protocols?(attr_name, value) - node.attributes.delete(attr_name) - else - node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(value) - end - end if node.attributes - - tags.include?(node.name) ? node : nil - else - sanitized_bad_tags.include?(parent.first) ? nil : node.to_s.gsub(/Welcome to my website!") # # => Welcome to my website! def strip_tags(html) - return html if html.blank? || !html.index("<") - tokenizer = HTML::Tokenizer.new(html) - - text = returning [] do |text| - while token = tokenizer.next - node = HTML::Node.parse(nil, 0, 0, token, false) - # result is only the content of any Text nodes - text << node.to_s if node.class == HTML::Text - end - end - - # strip any comments, and if they have a newline at the end (ie. line with - # only a comment) strip that too - result = text.join.gsub(/[\n]?/m, "") - - # Recurse - handle all dirty nested tags - result == html ? result : strip_tags(result) + self.class.full_sanitizer.sanitize(html) end # Strips all link tags from +text+ leaving just the link text. @@ -160,80 +87,57 @@ module ActionView # strip_links('Blog: Visit.') # # => Blog: Visit def strip_links(html) - if !html.blank? && (html.index("") - tokenizer = HTML::Tokenizer.new(html) - result = returning [] do |result| - while token = tokenizer.next - node = HTML::Node.parse(nil, 0, 0, token, false) - result << node.to_s unless node.is_a?(HTML::Tag) && ["a", "href"].include?(node.name) - end - end.join - result == html ? result : strip_links(result) # Recurse - handle all dirty nested links - else - html - end + self.class.link_sanitizer.sanitize(html) end - # A regular expression of the valid characters used to separate protocols like - # the ':' in 'http://foo.com' - @@sanitized_protocol_separator = /:|(�*58)|(p)|(%|%)3A/ - mattr_accessor :sanitized_protocol_separator, :instance_writer => false - - # Specifies a Set of HTML attributes that can have URIs. - @@sanitized_uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc)) - mattr_reader :sanitized_uri_attributes - - # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed - # to just escaping harmless tags like <font> - @@sanitized_bad_tags = Set.new(%w(script)) - mattr_reader :sanitized_bad_tags - - # Specifies the default Set of tags that the #sanitize helper will allow unscathed. - @@sanitized_allowed_tags = Set.new(%w(strong em b i p code pre tt output samp kbd var sub - sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr - acronym a img blockquote del ins fieldset legend)) - mattr_reader :sanitized_allowed_tags - - # Specifies the default Set of html attributes that the #sanitize helper will leave - # in the allowed tag. - @@sanitized_allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) - mattr_reader :sanitized_allowed_attributes - - # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. - @@sanitized_allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse - border-color border-left-color border-right-color border-top-color clear color cursor direction display - elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height - overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation - speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space - width)) - mattr_reader :sanitized_allowed_css_properties - - # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. - @@sanitized_allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center - collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal - nowrap olive pointer purple red right solid silver teal top transparent underline white yellow)) - mattr_reader :sanitized_allowed_css_keywords - - # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. - @@sanitized_shorthand_css_properties = Set.new(%w(background border margin padding)) - mattr_reader :sanitized_shorthand_css_properties - - # Specifies the default Set of protocols that the #sanitize helper will leave in - # protocol attributes. - @@sanitized_allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed svn urn aim rsync tag ssh sftp rtsp afs)) - mattr_reader :sanitized_allowed_protocols - module ClassMethods #:nodoc: def self.extended(base) class << base + attr_writer :full_sanitizer, :link_sanitizer, :white_list_sanitizer + # we want these to be class methods on ActionView::Base, they'll get mattr_readers for these below. - [:sanitized_protocol_separator, :sanitized_uri_attributes, :sanitized_bad_tags, :sanitized_allowed_tags, + helper_def = [:sanitized_protocol_separator, :sanitized_uri_attributes, :sanitized_bad_tags, :sanitized_allowed_tags, :sanitized_allowed_attributes, :sanitized_allowed_css_properties, :sanitized_allowed_css_keywords, - :sanitized_shorthand_css_properties, :sanitized_allowed_protocols, :sanitized_protocol_separator=].each do |prop| - delegate prop, :to => SanitizeHelper - end + :sanitized_shorthand_css_properties, :sanitized_allowed_protocols, :sanitized_protocol_separator=].collect! do |prop| + prop = prop.to_s + "def #{prop}(#{:value if prop =~ /=$/}) white_list_sanitizer.#{prop.sub /sanitized_/, ''} #{:value if prop =~ /=$/} end" + end.join("\n") + eval helper_def end end + + # Gets the HTML::FullSanitizer instance used by strip_tags. Replace with + # any object that responds to #sanitize + # + # Rails::Initializer.run do |config| + # config.action_view.full_sanitizer = MySpecialSanitizer.new + # end + # + def full_sanitizer + @full_sanitizer ||= HTML::FullSanitizer.new + end + + # Gets the HTML::LinkSanitizer instance used by strip_links. Replace with + # any object that responds to #sanitize + # + # Rails::Initializer.run do |config| + # config.action_view.link_sanitizer = MySpecialSanitizer.new + # end + # + def link_sanitizer + @link_sanitizer ||= HTML::LinkSanitizer.new + end + + # Gets the HTML::WhiteListSanitizer instance used by sanitize and sanitize_css. + # Replace with any object that responds to #sanitize + # + # Rails::Initializer.run do |config| + # config.action_view.white_list_sanitizer = MySpecialSanitizer.new + # end + # + def white_list_sanitizer + @white_list_sanitizer ||= HTML::WhiteListSanitizer.new + end # Adds valid HTML attributes that the #sanitize helper checks for URIs. # @@ -242,7 +146,7 @@ module ActionView # end # def sanitized_uri_attributes=(attributes) - Helpers::SanitizeHelper.sanitized_uri_attributes.merge(attributes) + HTML::WhiteListSanitizer.uri_attributes.merge(attributes) end # Adds to the Set of 'bad' tags for the #sanitize helper. @@ -252,7 +156,7 @@ module ActionView # end # def sanitized_bad_tags=(attributes) - Helpers::SanitizeHelper.sanitized_bad_tags.merge(attributes) + HTML::WhiteListSanitizer.bad_tags.merge(attributes) end # Adds to the Set of allowed tags for the #sanitize helper. # @@ -261,7 +165,7 @@ module ActionView # end # def sanitized_allowed_tags=(attributes) - Helpers::SanitizeHelper.sanitized_allowed_tags.merge(attributes) + HTML::WhiteListSanitizer.allowed_tags.merge(attributes) end # Adds to the Set of allowed html attributes for the #sanitize helper. @@ -271,7 +175,7 @@ module ActionView # end # def sanitized_allowed_attributes=(attributes) - Helpers::SanitizeHelper.sanitized_allowed_attributes.merge(attributes) + HTML::WhiteListSanitizer.allowed_attributes.merge(attributes) end # Adds to the Set of allowed css properties for the #sanitize and #sanitize_css heleprs. @@ -281,7 +185,7 @@ module ActionView # end # def sanitized_allowed_css_properties=(attributes) - Helpers::SanitizeHelper.sanitized_allowed_css_properties.merge(attributes) + HTML::WhiteListSanitizer.allowed_css_properties.merge(attributes) end # Adds to the Set of allowed css keywords for the #sanitize and #sanitize_css helpers. @@ -291,7 +195,7 @@ module ActionView # end # def sanitized_allowed_css_keywords=(attributes) - Helpers::SanitizeHelper.sanitized_allowed_css_keywords.merge(attributes) + HTML::WhiteListSanitizer.allowed_css_keywords.merge(attributes) end # Adds to the Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. @@ -301,7 +205,7 @@ module ActionView # end # def sanitized_shorthand_css_properties=(attributes) - Helpers::SanitizeHelper.sanitized_shorthand_css_properties.merge(attributes) + HTML::WhiteListSanitizer.shorthand_css_properties.merge(attributes) end # Adds to the Set of allowed protocols for the #sanitize helper. @@ -311,15 +215,9 @@ module ActionView # end # def sanitized_allowed_protocols=(attributes) - Helpers::SanitizeHelper.sanitized_allowed_protocols.merge(attributes) + HTML::WhiteListSanitizer.allowed_protocols.merge(attributes) end end - - private - def contains_bad_protocols?(attr_name, value) - sanitized_uri_attributes.include?(attr_name) && - (value =~ /(^[^\/:]*):|(�*58)|(p)|(%|%)3A/ && !sanitized_allowed_protocols.include?(value.split(sanitized_protocol_separator).first)) - end end end end -- cgit v1.2.3