From 05a2a6a0c5ac2384e52df9b8c2aa81352a51d7c7 Mon Sep 17 00:00:00 2001 From: Grey Baker Date: Sun, 3 May 2015 15:04:07 +0100 Subject: Handle invalid UTF-8 strings when HTML escaping Use `ActiveSupport::Multibyte::Unicode.tidy_bytes` to handle invalid UTF-8 strings in `ERB::Util.unwrapped_html_escape` and `ERB::Util.html_escape_once`. Prevents user-entered input passed from a querystring into a form field from causing invalid byte sequence errors. --- activesupport/CHANGELOG.md | 9 +++++++++ .../lib/active_support/core_ext/string/output_safety.rb | 4 ++-- activesupport/test/core_ext/string_ext_test.rb | 10 ++++++++-- 3 files changed, 19 insertions(+), 4 deletions(-) (limited to 'activesupport') diff --git a/activesupport/CHANGELOG.md b/activesupport/CHANGELOG.md index 6ebbdbc3db..c001ed1bc4 100644 --- a/activesupport/CHANGELOG.md +++ b/activesupport/CHANGELOG.md @@ -1,3 +1,12 @@ +* Handle invalid UTF-8 strings when HTML escaping + + Use `ActiveSupport::Multibyte::Unicode.tidy_bytes` to handle invalid UTF-8 + strings in `ERB::Util.unwrapped_html_escape` and `ERB::Util.html_escape_once`. + Prevents user-entered input passed from a querystring into a form field from + causing invalid byte sequence errors. + + *Grey Baker* + * Fix a range of values for parameters of the Time#change *Nikolay Kondratyev* diff --git a/activesupport/lib/active_support/core_ext/string/output_safety.rb b/activesupport/lib/active_support/core_ext/string/output_safety.rb index c676b26b06..084f6fecda 100644 --- a/activesupport/lib/active_support/core_ext/string/output_safety.rb +++ b/activesupport/lib/active_support/core_ext/string/output_safety.rb @@ -37,7 +37,7 @@ class ERB if s.html_safe? s else - s.gsub(HTML_ESCAPE_REGEXP, HTML_ESCAPE) + ActiveSupport::Multibyte::Unicode.tidy_bytes(s).gsub(HTML_ESCAPE_REGEXP, HTML_ESCAPE) end end module_function :unwrapped_html_escape @@ -50,7 +50,7 @@ class ERB # html_escape_once('<< Accept & Checkout') # # => "<< Accept & Checkout" def html_escape_once(s) - result = s.to_s.gsub(HTML_ESCAPE_ONCE_REGEXP, HTML_ESCAPE) + result = ActiveSupport::Multibyte::Unicode.tidy_bytes(s.to_s).gsub(HTML_ESCAPE_ONCE_REGEXP, HTML_ESCAPE) s.html_safe? ? result.html_safe : result end diff --git a/activesupport/test/core_ext/string_ext_test.rb b/activesupport/test/core_ext/string_ext_test.rb index 3a5d6df06d..9cc7bb1a77 100644 --- a/activesupport/test/core_ext/string_ext_test.rb +++ b/activesupport/test/core_ext/string_ext_test.rb @@ -782,8 +782,8 @@ class OutputSafetyTest < ActiveSupport::TestCase end test "ERB::Util.html_escape should correctly handle invalid UTF-8 strings" do - string = [192, 60].pack('CC') - expected = 192.chr + "<" + string = "\251 <" + expected = "© <" assert_equal expected, ERB::Util.html_escape(string) end @@ -799,6 +799,12 @@ class OutputSafetyTest < ActiveSupport::TestCase assert_equal escaped_string, ERB::Util.html_escape_once(string) assert_equal escaped_string, ERB::Util.html_escape_once(escaped_string) end + + test "ERB::Util.html_escape_once should correctly handle invalid UTF-8 strings" do + string = "\251 <" + expected = "© <" + assert_equal expected, ERB::Util.html_escape_once(string) + end end class StringExcludeTest < ActiveSupport::TestCase -- cgit v1.2.3