From 9b8ee8e006db581eb34dc0fa1d230653b7a1c956 Mon Sep 17 00:00:00 2001 From: zackham Date: Tue, 2 Apr 2013 13:18:24 -0700 Subject: Escape multibyte line terminators in JSON encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, json/encoding respects the JSON spec (as it should) which disallows \n and \r inside strings, escaping them as expected. Unfortunately, ECMA-262 (Javascript) disallows not only \n and \r in strings, but "Line Terminators" which includes U+2028 and U+2029. See here: http://bclary.com/2004/11/07/#a-7.3 This pull request adds U+2028 and U+2029 to be escaped. # Why?  It's very common to see something like this in a Rails template: If U+2028 or U+2029 are part of any attributes output in the to_json call, you will end up with an exception. In Chrome: Uncaught SyntaxError: Unexpected token ILLEGAL  # Why not? This is JSON encoding, and the JSON spec is specific about how to  encode strings. U+2028 and U+2029 don't get special treatment. Just trying to start a discussion... what do you do in your apps to deal with this? Is there a convention I'm missing? --- activesupport/lib/active_support/json/encoding.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/activesupport/lib/active_support/json/encoding.rb b/activesupport/lib/active_support/json/encoding.rb index 9bf1ea35b3..71dfbf3323 100644 --- a/activesupport/lib/active_support/json/encoding.rb +++ b/activesupport/lib/active_support/json/encoding.rb @@ -98,6 +98,8 @@ module ActiveSupport "\010" => '\b', "\f" => '\f', "\n" => '\n', + "\xe2\x80\xa8" => '\u2028', + "\xe2\x80\xa9" => '\u2029', "\r" => '\r', "\t" => '\t', '"' => '\"', @@ -121,9 +123,9 @@ module ActiveSupport def escape_html_entities_in_json=(value) self.escape_regex = \ if @escape_html_entities_in_json = value - /[\x00-\x1F"\\><&]/ + /\xe2\x80(\xa8|\xa9)|[\x00-\x1F"\\><&]/ else - /[\x00-\x1F"\\]/ + /\xe2\x80(\xa8|\xa9)|[\x00-\x1F"\\]/ end end -- cgit v1.2.3 From 582b44175b627e3578fe71e1d452c429022da636 Mon Sep 17 00:00:00 2001 From: Mario Caropreso Date: Thu, 9 May 2013 13:41:56 +0100 Subject: Added escaping of U+2028 and U+2029 inside the json encoder. U+2028 and U+2029 are allowed inside strings in JSON (as all literal Unicode characters) but JavaScript defines them as newline seperators. Because no literal newlines are allowed in a string, this causes a ParseError in the browser. We work around this issue by replacing them with the escaped version. The resulting JSON is still valid and can be parsed in the browser. This commit has been coauthored with Viktor Kelemen @yikulju --- activesupport/CHANGELOG.md | 6 ++++++ activesupport/lib/active_support/json/encoding.rb | 9 ++++++--- activesupport/test/json/encoding_test.rb | 4 ++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/activesupport/CHANGELOG.md b/activesupport/CHANGELOG.md index 2e04b17e78..27d4d54afa 100644 --- a/activesupport/CHANGELOG.md +++ b/activesupport/CHANGELOG.md @@ -1,3 +1,9 @@ +* Added escaping of U+2028 and U+2029 inside the json encoder. + These characters are legal in JSON but break the Javascript interpreter. + After escaping them, the JSON is still legal and can be parsed by Javascript. + + *Mario Caropreso* + * Fix skipping object callbacks using metadata fetched via callback chain inspection methods (`_*_callbacks`) diff --git a/activesupport/lib/active_support/json/encoding.rb b/activesupport/lib/active_support/json/encoding.rb index 9bf1ea35b3..db05b09a66 100644 --- a/activesupport/lib/active_support/json/encoding.rb +++ b/activesupport/lib/active_support/json/encoding.rb @@ -104,7 +104,10 @@ module ActiveSupport '\\' => '\\\\', '>' => '\u003E', '<' => '\u003C', - '&' => '\u0026' } + '&' => '\u0026', + "#{0xe2.chr}#{0x80.chr}#{0xa8.chr}" => '\u2028', + "#{0xe2.chr}#{0x80.chr}#{0xa9.chr}" => '\u2029', + } class << self # If true, use ISO 8601 format for dates and times. Otherwise, fall back @@ -121,9 +124,9 @@ module ActiveSupport def escape_html_entities_in_json=(value) self.escape_regex = \ if @escape_html_entities_in_json = value - /[\x00-\x1F"\\><&]/ + /[\x00-\x1F"\\><&]|#{0xe2.chr}#{0x80.chr}#{0xa8.chr}|#{0xe2.chr}#{0x80.chr}#{0xa9.chr}/ else - /[\x00-\x1F"\\]/ + /[\x00-\x1F"\\]|#{0xe2.chr}#{0x80.chr}#{0xa8.chr}|#{0xe2.chr}#{0x80.chr}#{0xa9.chr}/ end end diff --git a/activesupport/test/json/encoding_test.rb b/activesupport/test/json/encoding_test.rb index 8686dcf929..106a7fb522 100644 --- a/activesupport/test/json/encoding_test.rb +++ b/activesupport/test/json/encoding_test.rb @@ -45,8 +45,8 @@ class TestJSONEncoding < ActiveSupport::TestCase StringTests = [[ 'this is the ', %("this is the \\u003Cstring\\u003E")], [ 'a "string" with quotes & an ampersand', %("a \\"string\\" with quotes \\u0026 an ampersand") ], [ 'http://test.host/posts/1', %("http://test.host/posts/1")], - [ "Control characters: \x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", - %("Control characters: \\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000B\\f\\r\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001E\\u001F") ]] + [ "Control characters: \x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\342\200\250\342\200\251", + %("Control characters: \\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000B\\f\\r\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001E\\u001F\\u2028\\u2029") ]] ArrayTests = [[ ['a', 'b', 'c'], %([\"a\",\"b\",\"c\"]) ], [ [1, 'a', :b, nil, false], %([1,\"a\",\"b\",null,false]) ]] -- cgit v1.2.3 From 9a43816928f07c4ed988fb47545d219eff55e28a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?= Date: Fri, 10 May 2013 14:36:18 -0300 Subject: Fix syntax error on Ruby 2.0 Since Ruby 2.0 is UTF-8 by default we need to explictly say that the encoding of this file is US-ASCII --- activesupport/lib/active_support/json/encoding.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/activesupport/lib/active_support/json/encoding.rb b/activesupport/lib/active_support/json/encoding.rb index 71dfbf3323..75655f88d3 100644 --- a/activesupport/lib/active_support/json/encoding.rb +++ b/activesupport/lib/active_support/json/encoding.rb @@ -1,3 +1,5 @@ +#encoding: us-ascii + require 'active_support/core_ext/object/to_json' require 'active_support/core_ext/module/delegation' require 'active_support/json/variable' @@ -123,9 +125,9 @@ module ActiveSupport def escape_html_entities_in_json=(value) self.escape_regex = \ if @escape_html_entities_in_json = value - /\xe2\x80(\xa8|\xa9)|[\x00-\x1F"\\><&]/ + /\xe2\x80\xa8|\xe2\x80\xa9|[\x00-\x1F"\\><&]/ else - /\xe2\x80(\xa8|\xa9)|[\x00-\x1F"\\]/ + /\xe2\x80\xa8|\xe2\x80\xa9|[\x00-\x1F"\\]/ end end -- cgit v1.2.3 From 9d6a5b44320bb4655cf7b633668fed1ab8ddcac2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?= Date: Fri, 10 May 2013 14:39:29 -0300 Subject: Give credits to all the envolved people [ci skip] --- activesupport/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activesupport/CHANGELOG.md b/activesupport/CHANGELOG.md index 27d4d54afa..7c666a9b83 100644 --- a/activesupport/CHANGELOG.md +++ b/activesupport/CHANGELOG.md @@ -2,7 +2,7 @@ These characters are legal in JSON but break the Javascript interpreter. After escaping them, the JSON is still legal and can be parsed by Javascript. - *Mario Caropreso* + *Mario Caropreso + Viktor Kelemen + zackham* * Fix skipping object callbacks using metadata fetched via callback chain inspection methods (`_*_callbacks`) -- cgit v1.2.3