diff options
author | David Heinemeier Hansson <david@loudthinking.com> | 2007-10-10 17:35:10 +0000 |
---|---|---|
committer | David Heinemeier Hansson <david@loudthinking.com> | 2007-10-10 17:35:10 +0000 |
commit | 6637f9069c8bd819edc175cca13b667861aa3a0b (patch) | |
tree | 688b17b41ed5d886ff9e18a3ad5b9f5cdeee6184 /actionpack/lib/action_view/helpers/sanitize_helper.rb | |
parent | 1c881ca5bfc36689b4918edf497b38d24df7b57e (diff) | |
download | rails-6637f9069c8bd819edc175cca13b667861aa3a0b.tar.gz rails-6637f9069c8bd819edc175cca13b667861aa3a0b.tar.bz2 rails-6637f9069c8bd819edc175cca13b667861aa3a0b.zip |
Extracted sanitization methods from TextHelper to SanitizeHelper [DHH] Changed SanitizeHelper#sanitize to only allow the custom attributes and tags when specified in the call [DHH]
git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@7825 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
Diffstat (limited to 'actionpack/lib/action_view/helpers/sanitize_helper.rb')
-rw-r--r-- | actionpack/lib/action_view/helpers/sanitize_helper.rb | 325 |
1 files changed, 325 insertions, 0 deletions
diff --git a/actionpack/lib/action_view/helpers/sanitize_helper.rb b/actionpack/lib/action_view/helpers/sanitize_helper.rb new file mode 100644 index 0000000000..e67abd9f67 --- /dev/null +++ b/actionpack/lib/action_view/helpers/sanitize_helper.rb @@ -0,0 +1,325 @@ +require 'action_view/helpers/tag_helper' +require 'html/document' + +module ActionView + module Helpers #:nodoc: + # The SanitizeHelper module provides a set of methods for scrubbing text of undesired HTML elements. + # These helper methods extend ActionView making them callable within your template files. + module SanitizeHelper + def self.included(base) + base.extend(ClassMethods) + end + + # This #sanitize helper will html encode all tags and strip all attributes that aren't specifically allowed. + # It also strips href/src tags with invalid protocols, like javascript: especially. It does its best to counter any + # tricks that hackers may use, like throwing in unicode/ascii/hex values to get past the javascript: filters. Check out + # the extensive test suite. + # + # <%= sanitize @article.body %> + # + # You can add or remove tags/attributes if you want to customize it a bit. See ActionView::Base for full docs on the + # available options. You can add tags/attributes for single uses of #sanitize by passing either the :attributes or :tags options: + # + # Normal Use + # + # <%= sanitize @article.body %> + # + # Custom Use (only the mentioned tags and attributes are allowed, nothing else) + # + # <%= sanitize @article.body, :tags => %w(table tr td), :attributes => %w(id class style) + # + # Add table tags to the default allowed tags + # + # Rails::Initializer.run do |config| + # config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td' + # end + # + # Remove tags to the default allowed tags + # + # Rails::Initializer.run do |config| + # config.after_initialize do + # ActionView::Base.sanitized_allowed_tags.delete 'div' + # end + # end + # + # Change allowed default attributes + # + # Rails::Initializer.run do |config| + # config.action_view.sanitized_allowed_attributes = 'id', 'class', 'style' + # end + # + def sanitize(html, options = {}) + return html if html.blank? || !html.include?('<') + + attrs = options[:attributes] || sanitized_allowed_attributes + tags = options[:tags] || sanitized_allowed_tags + + returning [] do |new_text| + tokenizer = HTML::Tokenizer.new(html) + parent = [] + + while token = tokenizer.next + node = HTML::Node.parse(nil, 0, 0, token, false) + + new_text << case node + when HTML::Tag + if node.closing == :close + parent.shift + else + parent.unshift node.name + end + + node.attributes.keys.each do |attr_name| + value = node.attributes[attr_name].to_s + + if !attrs.include?(attr_name) || contains_bad_protocols?(attr_name, value) + node.attributes.delete(attr_name) + else + node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(value) + end + end if node.attributes + + tags.include?(node.name) ? node : nil + else + sanitized_bad_tags.include?(parent.first) ? nil : node.to_s.gsub(/</, "<") + end + end + end.join + end + + # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute + def sanitize_css(style) + # disallow urls + style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ') + + # gauntlet + if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ || + style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/ + return '' + end + + returning [] do |clean| + style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val| + if sanitized_allowed_css_properties.include?(prop.downcase) + clean << prop + ': ' + val + ';' + elsif sanitized_shorthand_css_properties.include?(prop.split('-')[0].downcase) + unless val.split().any? do |keyword| + !sanitized_allowed_css_keywords.include?(keyword) && + keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/ + end + clean << prop + ': ' + val + ';' + end + end + end + end.join(' ') + end + + # Strips all HTML tags from the +html+, including comments. This uses the + # html-scanner tokenizer and so its HTML parsing ability is limited by + # that of html-scanner. + # + # ==== Examples + # + # strip_tags("Strip <i>these</i> tags!") + # # => Strip these tags! + # + # strip_tags("<b>Bold</b> no more! <a href='more.html'>See more here</a>...") + # # => Bold no more! See more here... + # + # strip_tags("<div id='top-bar'>Welcome to my website!</div>") + # # => Welcome to my website! + def strip_tags(html) + return html if html.blank? || !html.index("<") + tokenizer = HTML::Tokenizer.new(html) + + text = returning [] do |text| + while token = tokenizer.next + node = HTML::Node.parse(nil, 0, 0, token, false) + # result is only the content of any Text nodes + text << node.to_s if node.class == HTML::Text + end + end + + # strip any comments, and if they have a newline at the end (ie. line with + # only a comment) strip that too + result = text.join.gsub(/<!--(.*?)-->[\n]?/m, "") + + # Recurse - handle all dirty nested tags + result == html ? result : strip_tags(result) + end + + # Strips all link tags from +text+ leaving just the link text. + # + # ==== Examples + # strip_links('<a href="http://www.rubyonrails.org">Ruby on Rails</a>') + # # => Ruby on Rails + # + # strip_links('Please e-mail me at <a href="mailto:me@email.com">me@email.com</a>.') + # # => Please e-mail me at me@email.com. + # + # strip_links('Blog: <a href="http://www.myblog.com/" class="nav" target=\"_blank\">Visit</a>.') + # # => Blog: Visit + def strip_links(html) + if !html.blank? && (html.index("<a") || html.index("<href")) && html.index(">") + tokenizer = HTML::Tokenizer.new(html) + result = returning [] do |result| + while token = tokenizer.next + node = HTML::Node.parse(nil, 0, 0, token, false) + result << node.to_s unless node.is_a?(HTML::Tag) && ["a", "href"].include?(node.name) + end + end.join + result == html ? result : strip_links(result) # Recurse - handle all dirty nested links + else + html + end + end + + # A regular expression of the valid characters used to separate protocols like + # the ':' in 'http://foo.com' + @@sanitized_protocol_separator = /:|(�*58)|(p)|(%|%)3A/ + mattr_accessor :sanitized_protocol_separator, :instance_writer => false + + # Specifies a Set of HTML attributes that can have URIs. + @@sanitized_uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc)) + mattr_reader :sanitized_uri_attributes + + # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed + # to just escaping harmless tags like <font> + @@sanitized_bad_tags = Set.new(%w(script)) + mattr_reader :sanitized_bad_tags + + # Specifies the default Set of tags that the #sanitize helper will allow unscathed. + @@sanitized_allowed_tags = Set.new(%w(strong em b i p code pre tt output samp kbd var sub + sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr + acronym a img blockquote del ins fieldset legend)) + mattr_reader :sanitized_allowed_tags + + # Specifies the default Set of html attributes that the #sanitize helper will leave + # in the allowed tag. + @@sanitized_allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) + mattr_reader :sanitized_allowed_attributes + + # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. + @@sanitized_allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse + border-color border-left-color border-right-color border-top-color clear color cursor direction display + elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height + overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation + speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space + width)) + mattr_reader :sanitized_allowed_css_properties + + # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. + @@sanitized_allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center + collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal + nowrap olive pointer purple red right solid silver teal top transparent underline white yellow)) + mattr_reader :sanitized_allowed_css_keywords + + # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. + @@sanitized_shorthand_css_properties = Set.new(%w(background border margin padding)) + mattr_reader :sanitized_shorthand_css_properties + + # Specifies the default Set of protocols that the #sanitize helper will leave in + # protocol attributes. + @@sanitized_allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed svn urn aim rsync tag ssh sftp rtsp afs)) + mattr_reader :sanitized_allowed_protocols + + module ClassMethods #:nodoc: + def self.extended(base) + class << base + # we want these to be class methods on ActionView::Base, they'll get mattr_readers for these below. + [:sanitized_protocol_separator, :sanitized_uri_attributes, :sanitized_bad_tags, :sanitized_allowed_tags, + :sanitized_allowed_attributes, :sanitized_allowed_css_properties, :sanitized_allowed_css_keywords, + :sanitized_shorthand_css_properties, :sanitized_allowed_protocols, :sanitized_protocol_separator=].each do |prop| + delegate prop, :to => SanitizeHelper + end + end + end + + # Adds valid HTML attributes that the #sanitize helper checks for URIs. + # + # Rails::Initializer.run do |config| + # config.action_view.sanitized_uri_attributes = 'lowsrc', 'target' + # end + # + def sanitized_uri_attributes=(attributes) + Helpers::SanitizeHelper.sanitized_uri_attributes.merge(attributes) + end + + # Adds to the Set of 'bad' tags for the #sanitize helper. + # + # Rails::Initializer.run do |config| + # config.action_view.sanitized_bad_tags = 'embed', 'object' + # end + # + def sanitized_bad_tags=(attributes) + Helpers::SanitizeHelper.sanitized_bad_tags.merge(attributes) + end + # Adds to the Set of allowed tags for the #sanitize helper. + # + # Rails::Initializer.run do |config| + # config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td' + # end + # + def sanitized_allowed_tags=(attributes) + Helpers::SanitizeHelper.sanitized_allowed_tags.merge(attributes) + end + + # Adds to the Set of allowed html attributes for the #sanitize helper. + # + # Rails::Initializer.run do |config| + # config.action_view.sanitized_allowed_attributes = 'onclick', 'longdesc' + # end + # + def sanitized_allowed_attributes=(attributes) + Helpers::SanitizeHelper.sanitized_allowed_attributes.merge(attributes) + end + + # Adds to the Set of allowed css properties for the #sanitize and #sanitize_css heleprs. + # + # Rails::Initializer.run do |config| + # config.action_view.sanitized_allowed_css_properties = 'expression' + # end + # + def sanitized_allowed_css_properties=(attributes) + Helpers::SanitizeHelper.sanitized_allowed_css_properties.merge(attributes) + end + + # Adds to the Set of allowed css keywords for the #sanitize and #sanitize_css helpers. + # + # Rails::Initializer.run do |config| + # config.action_view.sanitized_allowed_css_keywords = 'expression' + # end + # + def sanitized_allowed_css_keywords=(attributes) + Helpers::SanitizeHelper.sanitized_allowed_css_keywords.merge(attributes) + end + + # Adds to the Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. + # + # Rails::Initializer.run do |config| + # config.action_view.sanitized_shorthand_css_properties = 'expression' + # end + # + def sanitized_shorthand_css_properties=(attributes) + Helpers::SanitizeHelper.sanitized_shorthand_css_properties.merge(attributes) + end + + # Adds to the Set of allowed protocols for the #sanitize helper. + # + # Rails::Initializer.run do |config| + # config.action_view.sanitized_allowed_protocols = 'ssh', 'feed' + # end + # + def sanitized_allowed_protocols=(attributes) + Helpers::SanitizeHelper.sanitized_allowed_protocols.merge(attributes) + end + end + + private + def contains_bad_protocols?(attr_name, value) + sanitized_uri_attributes.include?(attr_name) && + (value =~ /(^[^\/:]*):|(�*58)|(p)|(%|%)3A/ && !sanitized_allowed_protocols.include?(value.split(sanitized_protocol_separator).first)) + end + end + end +end |