From 9b8ee8e006db581eb34dc0fa1d230653b7a1c956 Mon Sep 17 00:00:00 2001 From: zackham Date: Tue, 2 Apr 2013 13:18:24 -0700 Subject: Escape multibyte line terminators in JSON encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, json/encoding respects the JSON spec (as it should) which disallows \n and \r inside strings, escaping them as expected. Unfortunately, ECMA-262 (Javascript) disallows not only \n and \r in strings, but "Line Terminators" which includes U+2028 and U+2029. See here: http://bclary.com/2004/11/07/#a-7.3 This pull request adds U+2028 and U+2029 to be escaped. # Why?  It's very common to see something like this in a Rails template: If U+2028 or U+2029 are part of any attributes output in the to_json call, you will end up with an exception. In Chrome: Uncaught SyntaxError: Unexpected token ILLEGAL  # Why not? This is JSON encoding, and the JSON spec is specific about how to  encode strings. U+2028 and U+2029 don't get special treatment. Just trying to start a discussion... what do you do in your apps to deal with this? Is there a convention I'm missing? --- activesupport/lib/active_support/json/encoding.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'activesupport') diff --git a/activesupport/lib/active_support/json/encoding.rb b/activesupport/lib/active_support/json/encoding.rb index 9bf1ea35b3..71dfbf3323 100644 --- a/activesupport/lib/active_support/json/encoding.rb +++ b/activesupport/lib/active_support/json/encoding.rb @@ -98,6 +98,8 @@ module ActiveSupport "\010" => '\b', "\f" => '\f', "\n" => '\n', + "\xe2\x80\xa8" => '\u2028', + "\xe2\x80\xa9" => '\u2029', "\r" => '\r', "\t" => '\t', '"' => '\"', @@ -121,9 +123,9 @@ module ActiveSupport def escape_html_entities_in_json=(value) self.escape_regex = \ if @escape_html_entities_in_json = value - /[\x00-\x1F"\\><&]/ + /\xe2\x80(\xa8|\xa9)|[\x00-\x1F"\\><&]/ else - /[\x00-\x1F"\\]/ + /\xe2\x80(\xa8|\xa9)|[\x00-\x1F"\\]/ end end -- cgit v1.2.3