aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/core_ext/string/output_safety.rb
diff options
context:
space:
mode:
Diffstat (limited to 'activesupport/lib/active_support/core_ext/string/output_safety.rb')
-rw-r--r--activesupport/lib/active_support/core_ext/string/output_safety.rb132
1 files changed, 98 insertions, 34 deletions
diff --git a/activesupport/lib/active_support/core_ext/string/output_safety.rb b/activesupport/lib/active_support/core_ext/string/output_safety.rb
index 5f85cedcf5..ba92afd5f4 100644
--- a/activesupport/lib/active_support/core_ext/string/output_safety.rb
+++ b/activesupport/lib/active_support/core_ext/string/output_safety.rb
@@ -1,12 +1,14 @@
require 'erb'
require 'active_support/core_ext/kernel/singleton_class'
+require 'active_support/deprecation'
class ERB
module Util
HTML_ESCAPE = { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;', "'" => '&#39;' }
- JSON_ESCAPE = { '&' => '\u0026', '>' => '\u003E', '<' => '\u003C' }
- HTML_ESCAPE_ONCE_REGEXP = /["><']|&(?!([a-zA-Z]+|(#\d+));)/
- JSON_ESCAPE_REGEXP = /[&"><]/
+ JSON_ESCAPE = { '&' => '\u0026', '>' => '\u003e', '<' => '\u003c', "\u2028" => '\u2028', "\u2029" => '\u2029' }
+ HTML_ESCAPE_REGEXP = /[&"'><]/
+ HTML_ESCAPE_ONCE_REGEXP = /["><']|&(?!([a-zA-Z]+|(#\d+)|(#[xX][\dA-Fa-f]+));)/
+ JSON_ESCAPE_REGEXP = /[\u2028\u2029&><]/u
# A utility method for escaping HTML tag characters.
# This method is also aliased as <tt>h</tt>.
@@ -17,12 +19,7 @@ class ERB
# puts html_escape('is a > 0 & a < 10?')
# # => is a &gt; 0 &amp; a &lt; 10?
def html_escape(s)
- s = s.to_s
- if s.html_safe?
- s
- else
- s.gsub(/[&"'><]/, HTML_ESCAPE).html_safe
- end
+ unwrapped_html_escape(s).html_safe
end
# Aliasing twice issues a warning "discarding old...". Remove first to avoid it.
@@ -34,6 +31,18 @@ class ERB
singleton_class.send(:remove_method, :html_escape)
module_function :html_escape
+ # HTML escapes strings but doesn't wrap them with an ActiveSupport::SafeBuffer.
+ # This method is not for public consumption! Seriously!
+ def unwrapped_html_escape(s) # :nodoc:
+ s = s.to_s
+ if s.html_safe?
+ s
+ else
+ s.gsub(HTML_ESCAPE_REGEXP, HTML_ESCAPE)
+ end
+ end
+ module_function :unwrapped_html_escape
+
# A utility method for escaping HTML without affecting existing escaped entities.
#
# html_escape_once('1 < 2 &amp; 3')
@@ -42,25 +51,64 @@ class ERB
# html_escape_once('&lt;&lt; Accept & Checkout')
# # => "&lt;&lt; Accept &amp; Checkout"
def html_escape_once(s)
- result = s.to_s.gsub(HTML_ESCAPE_ONCE_REGEXP) { |special| HTML_ESCAPE[special] }
+ result = s.to_s.gsub(HTML_ESCAPE_ONCE_REGEXP, HTML_ESCAPE)
s.html_safe? ? result.html_safe : result
end
module_function :html_escape_once
- # A utility method for escaping HTML entities in JSON strings
- # using \uXXXX JavaScript escape sequences for string literals:
+ # A utility method for escaping HTML entities in JSON strings. Specifically, the
+ # &, > and < characters are replaced with their equivalent unicode escaped form -
+ # \u0026, \u003e, and \u003c. The Unicode sequences \u2028 and \u2029 are also
+ # escaped as they are treated as newline characters in some JavaScript engines.
+ # These sequences have identical meaning as the original characters inside the
+ # context of a JSON string, so assuming the input is a valid and well-formed
+ # JSON value, the output will have equivalent meaning when parsed:
+ #
+ # json = JSON.generate({ name: "</script><script>alert('PWNED!!!')</script>"})
+ # # => "{\"name\":\"</script><script>alert('PWNED!!!')</script>\"}"
+ #
+ # json_escape(json)
+ # # => "{\"name\":\"\\u003C/script\\u003E\\u003Cscript\\u003Ealert('PWNED!!!')\\u003C/script\\u003E\"}"
+ #
+ # JSON.parse(json) == JSON.parse(json_escape(json))
+ # # => true
+ #
+ # The intended use case for this method is to escape JSON strings before including
+ # them inside a script tag to avoid XSS vulnerability:
#
- # json_escape('is a > 0 & a < 10?')
- # # => is a \u003E 0 \u0026 a \u003C 10?
+ # <script>
+ # var currentUser = <%= raw json_escape(current_user.to_json) %>;
+ # </script>
#
- # Note that after this operation is performed the output is not
- # valid JSON. In particular double quotes are removed:
+ # It is necessary to +raw+ the result of +json_escape+, so that quotation marks
+ # don't get converted to <tt>&quot;</tt> entities. +json_escape+ doesn't
+ # automatically flag the result as HTML safe, since the raw value is unsafe to
+ # use inside HTML attributes.
#
- # json_escape('{"name":"john","created_at":"2010-04-28T01:39:31Z","id":1}')
- # # => {name:john,created_at:2010-04-28T01:39:31Z,id:1}
+ # If you need to output JSON elsewhere in your HTML, you can just do something
+ # like this, as any unsafe characters (including quotation marks) will be
+ # automatically escaped for you:
+ #
+ # <div data-user-info="<%= current_user.to_json %>">...</div>
+ #
+ # WARNING: this helper only works with valid JSON. Using this on non-JSON values
+ # will open up serious XSS vulnerabilities. For example, if you replace the
+ # +current_user.to_json+ in the example above with user input instead, the browser
+ # will happily eval() that string as JavaScript.
+ #
+ # The escaping performed in this method is identical to those performed in the
+ # Active Support JSON encoder when +ActiveSupport.escape_html_entities_in_json+ is
+ # set to true. Because this transformation is idempotent, this helper can be
+ # applied even if +ActiveSupport.escape_html_entities_in_json+ is already true.
+ #
+ # Therefore, when you are unsure if +ActiveSupport.escape_html_entities_in_json+
+ # is enabled, or if you are unsure where your JSON string originated from, it
+ # is recommended that you always apply this helper (other libraries, such as the
+ # JSON gem, do not provide this kind of protection by default; also some gems
+ # might override +to_json+ to bypass Active Support's encoder).
def json_escape(s)
- result = s.to_s.gsub(JSON_ESCAPE_REGEXP) { |special| JSON_ESCAPE[special] }
+ result = s.to_s.gsub(JSON_ESCAPE_REGEXP, JSON_ESCAPE)
s.html_safe? ? result.html_safe : result
end
@@ -84,7 +132,7 @@ module ActiveSupport #:nodoc:
class SafeBuffer < String
UNSAFE_STRING_METHODS = %w(
capitalize chomp chop delete downcase gsub lstrip next reverse rstrip
- slice squeeze strip sub succ swapcase tr tr_s upcase prepend
+ slice squeeze strip sub succ swapcase tr tr_s upcase
)
alias_method :original_concat, :concat
@@ -102,7 +150,7 @@ module ActiveSupport #:nodoc:
else
if html_safe?
new_safe_buffer = super
- new_safe_buffer.instance_eval { @html_safe = true }
+ new_safe_buffer.instance_variable_set :@html_safe, true
new_safe_buffer
else
to_str[*args]
@@ -130,28 +178,32 @@ module ActiveSupport #:nodoc:
end
def concat(value)
- if !html_safe? || value.html_safe?
- super(value)
- else
- super(ERB::Util.h(value))
- end
+ super(html_escape_interpolated_argument(value))
end
alias << concat
+ def prepend(value)
+ super(html_escape_interpolated_argument(value))
+ end
+
+ def prepend!(value)
+ ActiveSupport::Deprecation.deprecation_warning "ActiveSupport::SafeBuffer#prepend!", :prepend
+ prepend value
+ end
+
def +(other)
dup.concat(other)
end
def %(args)
- args = Array(args).map do |arg|
- if !html_safe? || arg.html_safe?
- arg
- else
- ERB::Util.h(arg)
- end
+ case args
+ when Hash
+ escaped_args = Hash[args.map { |k,arg| [k, html_escape_interpolated_argument(arg)] }]
+ else
+ escaped_args = Array(args).map { |arg| html_escape_interpolated_argument(arg) }
end
- self.class.new(super(args))
+ self.class.new(super(escaped_args))
end
def html_safe?
@@ -171,7 +223,7 @@ module ActiveSupport #:nodoc:
end
UNSAFE_STRING_METHODS.each do |unsafe_method|
- if 'String'.respond_to?(unsafe_method)
+ if unsafe_method.respond_to?(unsafe_method)
class_eval <<-EOT, __FILE__, __LINE__ + 1
def #{unsafe_method}(*args, &block) # def capitalize(*args, &block)
to_str.#{unsafe_method}(*args, &block) # to_str.capitalize(*args, &block)
@@ -184,10 +236,22 @@ module ActiveSupport #:nodoc:
EOT
end
end
+
+ private
+
+ def html_escape_interpolated_argument(arg)
+ (!html_safe? || arg.html_safe?) ? arg :
+ arg.to_s.gsub(ERB::Util::HTML_ESCAPE_REGEXP, ERB::Util::HTML_ESCAPE)
+ end
end
end
class String
+ # Marks a string as trusted safe. It will be inserted into HTML with no
+ # additional escaping performed. It is your responsibilty to ensure that the
+ # string contains no malicious content. This method is equivalent to the
+ # `raw` helper in views. It is recommended that you use `sanitize` instead of
+ # this method. It should never be called on user input.
def html_safe
ActiveSupport::SafeBuffer.new(self)
end